From 568f7fcf040bca4b10f61cfb13279780ba5b1051 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Mon, 26 Mar 2012 15:56:24 -0400 Subject: Add matrix multiply --- Makefile | 8 ++++- mat_mul.c | 103 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ mat_mul.cl | 19 ++++++++++++ 3 files changed, 129 insertions(+), 1 deletion(-) create mode 100644 mat_mul.c create mode 100644 mat_mul.cl diff --git a/Makefile b/Makefile index e5719e4..e7e14dc 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ COMMON_OBJECTS = cl_simple.o cl_util.o LDFLAGS = -L/usr/local/lib -lOpenCL -all: hello_world math-int get-global-id get-global-id-2d loop +all: hello_world math-int get-global-id get-global-id-2d loop get-global-id-3d mat-mul hello_world: hello_world.o $(COMMON_OBJECTS) gcc -o $@ $^ $(LDFLAGS) @@ -20,3 +20,9 @@ get-global-id-2d: get_global_id_2d.o $(COMMON_OBJECTS) loop: loop.o $(COMMON_OBJECTS) gcc -o $@ $^ $(LDFLAGS) + +get-global-id-3d: get_global_id_3d.o $(COMMON_OBJECTS) + gcc -o $@ $^ $(LDFLAGS) + +mat-mul: mat_mul.o $(COMMON_OBJECTS) + gcc -o $@ $^ $(LDFLAGS) diff --git a/mat_mul.c b/mat_mul.c new file mode 100644 index 0000000..fbe7cc8 --- /dev/null +++ b/mat_mul.c @@ -0,0 +1,103 @@ + +#include +#include "cl_simple.h" + +#include +#include + +int main(int argc, char ** argv) +{ + unsigned i,j; + + struct cl_simple_context context; + + size_t widthA, heightA, widthB, heightB; + int * out; + + int out_size; + + int out_bytes; + + cl_mem inputA, inputB; + + size_t global_work_size[2]; + + size_t a_bytes; + size_t b_bytes; +/* + int A[8] = {1, 3, 4, 7, + 2, 4, 6, 8}; + + int B[12] = {1, 8, 9, + 2, 7, 10, + 3, 6, 11, + 4, 5, 12}; +*/ + int A[1] = {5}; + int B[1] = {3}; + + widthA = 1; + heightA = 1; + + widthB = 1; + heightB = 1; + + global_work_size[0] = widthB; + global_work_size[1] = heightA; + + out_size = heightA * heightB; + out_bytes = sizeof(int) * out_size; + + a_bytes = sizeof(int) * widthA * heightA; + b_bytes = sizeof(int) * widthB * heightB; + + out = malloc(out_bytes); + + if (!clSimpleSimpleInit(&context, "mat_mul")) { + return EXIT_FAILURE; + } + + if ( !clSimpleCreateBuffer(&inputA, context.cl_ctx, CL_MEM_READ_ONLY, + a_bytes) + || !clSimpleCreateBuffer(&inputB, context.cl_ctx, CL_MEM_READ_ONLY, + b_bytes)) { + return EXIT_FAILURE; + } + + if ( !clSimpleEnqueueWriteBuffer(context.command_queue, inputA, + sizeof(A), A) + || !clSimpleEnqueueWriteBuffer(context.command_queue, inputB, + sizeof(B), B)) { + return EXIT_FAILURE; + } + + if (!clSimpleSetOutputBuffer(&context, out_bytes)) { + return EXIT_FAILURE; + } + + if ( !clSimpleKernelSetArg(context.kernel, 1, sizeof(int), &widthA) + || !clSimpleKernelSetArg(context.kernel, 2, sizeof(int), &heightA) + || !clSimpleKernelSetArg(context.kernel, 3, sizeof(int), &widthB) + || !clSimpleKernelSetArg(context.kernel, 4, sizeof(int), &heightB) + || !clSimpleKernelSetArg(context.kernel, 5, sizeof(cl_mem), &inputA) + || !clSimpleKernelSetArg(context.kernel, 6, sizeof(cl_mem), &inputB)) { + return EXIT_FAILURE; + } + + if (!clSimpleEnqueueNDRangeKernel(context.command_queue, + context.kernel, + 2, global_work_size, global_work_size)) { + return EXIT_FAILURE; + } + + if (!clSimpleReadOutput(&context, out, out_bytes)) { + return EXIT_FAILURE; + } + + for (i = 0; i < out_size; i++) { + fprintf(stderr, "%d ", out[i]); + if ((i + 1) % heightB == 0) { + fprintf(stderr, "\n"); + } + } +} diff --git a/mat_mul.cl b/mat_mul.cl new file mode 100644 index 0000000..565a36f --- /dev/null +++ b/mat_mul.cl @@ -0,0 +1,19 @@ +__kernel void mat_mul( + __global int* out, + int widthA, + int heightA, + int widthB, + int heightB, + __global int * inputA, + __global int * inputB) +{ + int row = get_global_id(1); + int col = get_global_id(0); + + int sum = 0; + + for (int i = 0; i < widthA; i++) { + sum += inputA[row * widthA + i] * inputB[i * widthB + col]; + } + out[row * widthB + col] = sum; +} -- cgit v1.2.3