#include #include "cl_simple.h" #include #include int main(int argc, char ** argv) { unsigned i,j; struct cl_simple_context context; size_t widthA, heightA, widthB, heightB; int * out; int out_size; int out_bytes; cl_mem inputA, inputB; size_t global_work_size[2]; size_t a_bytes; size_t b_bytes; /* 2 x 4 */ int A[8] = {1, 3, 5, 7, 2, 4, 6, 8}; /* 4 x 3 */ int B[12] = {1, 8, 9, 2, 7, 10, 3, 6, 11, 4, 5, 12}; heightA = 2; widthA = 4; heightB = 4; widthB = 3; global_work_size[0] = widthB; global_work_size[1] = heightA; out_size = heightA * widthB; out_bytes = sizeof(int) * out_size; a_bytes = sizeof(int) * widthA * heightA; b_bytes = sizeof(int) * widthB * heightB; out = malloc(out_bytes); if (!clSimpleSimpleInit(&context, "mat_mul")) { return EXIT_FAILURE; } if ( !clSimpleCreateBuffer(&inputA, context.cl_ctx, CL_MEM_READ_ONLY, a_bytes) || !clSimpleCreateBuffer(&inputB, context.cl_ctx, CL_MEM_READ_ONLY, b_bytes)) { return EXIT_FAILURE; } if ( !clSimpleEnqueueWriteBuffer(context.command_queue, inputA, sizeof(A), A) || !clSimpleEnqueueWriteBuffer(context.command_queue, inputB, sizeof(B), B)) { return EXIT_FAILURE; } if (!clSimpleSetOutputBuffer(&context, out_bytes)) { return EXIT_FAILURE; } if ( !clSimpleKernelSetArg(context.kernel, 1, sizeof(int), &widthA) || !clSimpleKernelSetArg(context.kernel, 2, sizeof(int), &heightA) || !clSimpleKernelSetArg(context.kernel, 3, sizeof(int), &widthB) || !clSimpleKernelSetArg(context.kernel, 4, sizeof(int), &heightB) || !clSimpleKernelSetArg(context.kernel, 5, sizeof(cl_mem), &inputA) || !clSimpleKernelSetArg(context.kernel, 6, sizeof(cl_mem), &inputB)) { return EXIT_FAILURE; } if (!clSimpleEnqueueNDRangeKernel(context.command_queue, context.kernel, 2, global_work_size, global_work_size)) { return EXIT_FAILURE; } if (!clSimpleReadOutput(&context, out, out_bytes)) { return EXIT_FAILURE; } for (i = 0; i < out_size; i++) { fprintf(stderr, "%d ", out[i]); if ((i + 1) % widthB == 0) { fprintf(stderr, "\n"); } } }