summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile8
-rw-r--r--mat_mul.c103
-rw-r--r--mat_mul.cl19
3 files changed, 129 insertions, 1 deletions
diff --git a/Makefile b/Makefile
index e5719e4..e7e14dc 100644
--- a/Makefile
+++ b/Makefile
@@ -4,7 +4,7 @@ COMMON_OBJECTS = cl_simple.o cl_util.o
LDFLAGS = -L/usr/local/lib -lOpenCL
-all: hello_world math-int get-global-id get-global-id-2d loop
+all: hello_world math-int get-global-id get-global-id-2d loop get-global-id-3d mat-mul
hello_world: hello_world.o $(COMMON_OBJECTS)
gcc -o $@ $^ $(LDFLAGS)
@@ -20,3 +20,9 @@ get-global-id-2d: get_global_id_2d.o $(COMMON_OBJECTS)
loop: loop.o $(COMMON_OBJECTS)
gcc -o $@ $^ $(LDFLAGS)
+
+get-global-id-3d: get_global_id_3d.o $(COMMON_OBJECTS)
+ gcc -o $@ $^ $(LDFLAGS)
+
+mat-mul: mat_mul.o $(COMMON_OBJECTS)
+ gcc -o $@ $^ $(LDFLAGS)
diff --git a/mat_mul.c b/mat_mul.c
new file mode 100644
index 0000000..fbe7cc8
--- /dev/null
+++ b/mat_mul.c
@@ -0,0 +1,103 @@
+
+#include <CL/cl.h>
+#include "cl_simple.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+
+int main(int argc, char ** argv)
+{
+ unsigned i,j;
+
+ struct cl_simple_context context;
+
+ size_t widthA, heightA, widthB, heightB;
+ int * out;
+
+ int out_size;
+
+ int out_bytes;
+
+ cl_mem inputA, inputB;
+
+ size_t global_work_size[2];
+
+ size_t a_bytes;
+ size_t b_bytes;
+/*
+ int A[8] = {1, 3, 4, 7,
+ 2, 4, 6, 8};
+
+ int B[12] = {1, 8, 9,
+ 2, 7, 10,
+ 3, 6, 11,
+ 4, 5, 12};
+*/
+ int A[1] = {5};
+ int B[1] = {3};
+
+ widthA = 1;
+ heightA = 1;
+
+ widthB = 1;
+ heightB = 1;
+
+ global_work_size[0] = widthB;
+ global_work_size[1] = heightA;
+
+ out_size = heightA * heightB;
+ out_bytes = sizeof(int) * out_size;
+
+ a_bytes = sizeof(int) * widthA * heightA;
+ b_bytes = sizeof(int) * widthB * heightB;
+
+ out = malloc(out_bytes);
+
+ if (!clSimpleSimpleInit(&context, "mat_mul")) {
+ return EXIT_FAILURE;
+ }
+
+ if ( !clSimpleCreateBuffer(&inputA, context.cl_ctx, CL_MEM_READ_ONLY,
+ a_bytes)
+ || !clSimpleCreateBuffer(&inputB, context.cl_ctx, CL_MEM_READ_ONLY,
+ b_bytes)) {
+ return EXIT_FAILURE;
+ }
+
+ if ( !clSimpleEnqueueWriteBuffer(context.command_queue, inputA,
+ sizeof(A), A)
+ || !clSimpleEnqueueWriteBuffer(context.command_queue, inputB,
+ sizeof(B), B)) {
+ return EXIT_FAILURE;
+ }
+
+ if (!clSimpleSetOutputBuffer(&context, out_bytes)) {
+ return EXIT_FAILURE;
+ }
+
+ if ( !clSimpleKernelSetArg(context.kernel, 1, sizeof(int), &widthA)
+ || !clSimpleKernelSetArg(context.kernel, 2, sizeof(int), &heightA)
+ || !clSimpleKernelSetArg(context.kernel, 3, sizeof(int), &widthB)
+ || !clSimpleKernelSetArg(context.kernel, 4, sizeof(int), &heightB)
+ || !clSimpleKernelSetArg(context.kernel, 5, sizeof(cl_mem), &inputA)
+ || !clSimpleKernelSetArg(context.kernel, 6, sizeof(cl_mem), &inputB)) {
+ return EXIT_FAILURE;
+ }
+
+ if (!clSimpleEnqueueNDRangeKernel(context.command_queue,
+ context.kernel,
+ 2, global_work_size, global_work_size)) {
+ return EXIT_FAILURE;
+ }
+
+ if (!clSimpleReadOutput(&context, out, out_bytes)) {
+ return EXIT_FAILURE;
+ }
+
+ for (i = 0; i < out_size; i++) {
+ fprintf(stderr, "%d ", out[i]);
+ if ((i + 1) % heightB == 0) {
+ fprintf(stderr, "\n");
+ }
+ }
+}
diff --git a/mat_mul.cl b/mat_mul.cl
new file mode 100644
index 0000000..565a36f
--- /dev/null
+++ b/mat_mul.cl
@@ -0,0 +1,19 @@
+__kernel void mat_mul(
+ __global int* out,
+ int widthA,
+ int heightA,
+ int widthB,
+ int heightB,
+ __global int * inputA,
+ __global int * inputB)
+{
+ int row = get_global_id(1);
+ int col = get_global_id(0);
+
+ int sum = 0;
+
+ for (int i = 0; i < widthA; i++) {
+ sum += inputA[row * widthA + i] * inputB[i * widthB + col];
+ }
+ out[row * widthB + col] = sum;
+}