#include #include #include #include #include "util.h" #define CASE_ERR(ec) case ec: return #ec; const char * clTestErrorString(cl_int error) { switch(error) { CASE_ERR(CL_DEVICE_NOT_AVAILABLE); CASE_ERR(CL_DEVICE_NOT_FOUND); CASE_ERR(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST); CASE_ERR(CL_INVALID_ARG_INDEX); CASE_ERR(CL_INVALID_ARG_SIZE); CASE_ERR(CL_INVALID_ARG_VALUE); CASE_ERR(CL_INVALID_COMMAND_QUEUE); CASE_ERR(CL_INVALID_CONTEXT); CASE_ERR(CL_INVALID_DEVICE); CASE_ERR(CL_INVALID_DEVICE_TYPE); CASE_ERR(CL_INVALID_EVENT_WAIT_LIST); CASE_ERR(CL_INVALID_GLOBAL_OFFSET); CASE_ERR(CL_INVALID_GLOBAL_WORK_SIZE); CASE_ERR(CL_INVALID_IMAGE_SIZE); CASE_ERR(CL_INVALID_KERNEL); CASE_ERR(CL_INVALID_KERNEL_ARGS); CASE_ERR(CL_INVALID_KERNEL_DEFINITION); CASE_ERR(CL_INVALID_KERNEL_NAME); CASE_ERR(CL_INVALID_MEM_OBJECT); CASE_ERR(CL_INVALID_OPERATION); CASE_ERR(CL_INVALID_PLATFORM); CASE_ERR(CL_INVALID_PROGRAM); CASE_ERR(CL_INVALID_PROGRAM_EXECUTABLE); CASE_ERR(CL_INVALID_PROPERTY); CASE_ERR(CL_INVALID_SAMPLER); CASE_ERR(CL_INVALID_VALUE); CASE_ERR(CL_INVALID_WORK_DIMENSION); CASE_ERR(CL_INVALID_WORK_GROUP_SIZE); CASE_ERR(CL_INVALID_WORK_ITEM_SIZE); CASE_ERR(CL_MEM_OBJECT_ALLOCATION_FAILURE); CASE_ERR(CL_MISALIGNED_SUB_BUFFER_OFFSET); CASE_ERR(CL_OUT_OF_HOST_MEMORY); CASE_ERR(CL_OUT_OF_RESOURCES); CASE_ERR(CL_SUCCESS); } } unsigned clTestInitGpuDevice(cl_device_id * device_id) { cl_int error; cl_uint total_platforms; cl_platform_id platform_id; cl_uint total_gpu_devices; error = clGetPlatformIDs( 1, /* Max number of platform IDs to return */ &platform_id, /* Pointer to platform_id */ &total_platforms); /* Total number of platforms * found on the system */ if (error != CL_SUCCESS) { fprintf(stderr, "clGetPlatformIDs() failed: %s\n", clTestErrorString(error)); return 0; } fprintf(stderr, "There are %u platforms.\n", total_platforms); error = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 1, device_id, &total_gpu_devices); if (error != CL_SUCCESS) { fprintf(stderr, "clGetDeviceIDs() failed: %s\n", clTestErrorString(error)); return 0; } fprintf(stderr, "There are %u GPU devices.\n", total_gpu_devices); return 1; } unsigned clTestCreateContext(cl_context * context, cl_device_id device_id) { cl_int error; *context = clCreateContext(NULL, /* Properties */ 1, /* Number of devices */ &device_id, /* Device pointer */ NULL, /* Callback for reporting errors */ NULL, /* User data to pass to error callback */ &error); /* Error code */ if (error != CL_SUCCESS) { fprintf(stderr, "clCreateContext() failed: %s\n", clTestErrorString(error)); return 0; } return 1; } unsigned clTestCreateCommandQueue(cl_command_queue * command_queue, cl_context context, cl_device_id device_id) { cl_int error; *command_queue = clCreateCommandQueue(context, device_id, 0, /* Command queue properties */ &error); /* Error code */ if (error != CL_SUCCESS) { fprintf(stderr, "clCreateCommandQueue() failed: %s\n", clTestErrorString(error)); return 0; } return 1; } #define CODE_CHUNK 64 unsigned clTestCreateKernel(cl_context context, cl_device_id device_id, cl_kernel * kernel, const char * kernel_name) { char * filename; char * code = NULL; size_t code_len = 0; int fd; int bytes_read; /* +3 .cl * +1 NULL byte * -- * +4 */ unsigned filename_len = strlen(kernel_name) + 4; cl_int error; cl_program program; /* Determine file name */ filename = malloc (filename_len + 4); if (!filename) { fprintf(stderr, "Failed to malloc filename.\n"); return 0; } snprintf(filename, filename_len, "%s.cl", kernel_name); /* Open file */ fd = open(filename, O_RDONLY); if (fd < 0) { fprintf(stderr, "Failed to open file: %s\n", filename); return 0; } /* Read code */ do { code = realloc(code, (code_len + CODE_CHUNK) * sizeof(unsigned char)); if (!code) { fprintf(stderr, "Failed to realloc code.\n"); return 0; } bytes_read = read(fd, code + code_len, CODE_CHUNK); if (bytes_read < 0) { fprintf(stderr, "Failed to read code.\n"); return 0; } code_len += bytes_read; } while(bytes_read == CODE_CHUNK); /* Create program */ program = clCreateProgramWithSource(context, 1, /* Number of strings */ &code, &code_len, /* String lengths, 0 means all the * strings are NULL terminated. */ &error); if (error != CL_SUCCESS) { fprintf(stderr, "clCreateProgramWithSource() failed: %s\n", clTestErrorString(error)); return 0; } fprintf(stderr, "clCreateProgramWithSource() suceeded.\n"); /* Build program */ error = clBuildProgram(program, 1, /* Number of devices */ &device_id, NULL, /* options */ NULL, /* callback function when compile is complete */ NULL); /* user data for callback */ if (error != CL_SUCCESS) { char build_str[10000]; error = clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 10000, /* Size of output string */ build_str, /* pointer to write the log to */ NULL); /* Number of bytes written to the log */ if (error != CL_SUCCESS) { fprintf(stderr, "clGetProgramBuildInfo() failed: %s\n", clTestErrorString(error)); } else { fprintf(stderr, "Build Log: \n%s\n\n", build_str); } return 0; } fprintf(stderr, "clBuildProgram() suceeded.\n"); *kernel = clCreateKernel(program, kernel_name, &error); if (error != CL_SUCCESS) { fprintf(stderr, "clCreateKernel() failed: %s\n", clTestErrorString(error)); return 0; } fprintf(stderr, "clCreateKernel() suceeded.\n"); return 1; } unsigned clTestSimpleInit(struct cltest_context * context, const char * kernel_name) { if (!clTestInitGpuDevice(&context->device_id)) { return 0; } if (!clTestCreateContext(&context->cl_ctx, context->device_id)) { return 0; } if (!clTestCreateCommandQueue(&context->command_queue, context->cl_ctx, context->device_id)) { return 0; } if (!clTestCreateKernel(context->cl_ctx, context->device_id, &context->kernel, kernel_name)) { return 0; } return 1; } unsigned clTestSetOutputBuffer(struct cltest_context * context, unsigned buffer_size) { cl_int error; context->out_buffer = clCreateBuffer(context->cl_ctx, CL_MEM_WRITE_ONLY, /* Flags */ buffer_size, /* Size of buffer */ NULL, /* Pointer to the data */ &error); /* error code */ if (error != CL_SUCCESS) { fprintf(stderr, "clCreateBuffer() failed: %s\n", clTestErrorString(error)); return 0; } if (!clTestKernelSetArg(context->kernel, 0, sizeof(cl_mem), &context->out_buffer)) { return 0; } return 1; } unsigned clTestKernelSetArg(cl_kernel kernel, cl_uint index, size_t size, const void * value) { cl_int error; error = clSetKernelArg(kernel, index, size, value); if (error != CL_SUCCESS) { fprintf(stderr, "clSetKernelArg failed: %s\n", clTestErrorString(error)); return 0; } fprintf(stderr, "clSetKernelArg() succeeded.\n"); return 1; } unsigned clTestEnqueueNDRangeKernel(cl_command_queue command_queue, cl_kernel kernel, cl_uint work_dim, const size_t * global_work_size, const size_t * local_work_size) { cl_int error = clEnqueueNDRangeKernel(command_queue, kernel, work_dim, /* Number of dimensions */ NULL, /* Global work offset */ global_work_size, local_work_size, /* local work size */ 0, /* Events in wait list */ NULL, /* Wait list */ NULL); /* Event object for this event */ if (error != CL_SUCCESS) { fprintf(stderr, "clEnqueueNDRangeKernel() failed: %s\n", clTestErrorString(error)); return 0; } return 1; }