/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #include "utest_file_map.hpp" #include "utest_helper.hpp" #include "utest_error.h" #include "CL/cl.h" #include "CL/cl_intel.h" #include #include #include #include #include #define FATAL(...) \ do { \ fprintf(stderr, "error: "); \ fprintf(stderr, __VA_ARGS__); \ fprintf(stderr, "\n");\ assert(0); \ exit(-1); \ } while (0) #define FATAL_IF(COND, ...) \ do { \ if (COND) FATAL(__VA_ARGS__); \ } while (0) cl_platform_id platform = NULL; cl_device_id device = NULL; cl_context ctx = NULL; cl_program program = NULL; cl_kernel kernel = NULL; cl_command_queue queue = NULL; cl_mem buf[MAX_BUFFER_N] = {}; void *buf_data[MAX_BUFFER_N] = {}; size_t globals[3] = {}; size_t locals[3] = {}; #ifdef HAS_EGL Display *xDisplay; EGLDisplay eglDisplay; EGLContext eglContext = NULL; EGLSurface eglSurface; Window xWindow; void cl_ocl_destroy_egl_window() { eglMakeCurrent(eglDisplay, EGL_NO_SURFACE, EGL_NO_SURFACE, EGL_NO_CONTEXT); eglDestroyContext(eglDisplay, eglContext); eglDestroySurface(eglDisplay, eglSurface); XDestroyWindow(xDisplay, xWindow); XCloseDisplay(xDisplay); } bool init_egl_window(int width, int height) { XSetWindowAttributes swa; Window win, root; EGLint attr[] = { // some attributes to set up our egl-interface EGL_BUFFER_SIZE, 16, EGL_RENDERABLE_TYPE, EGL_OPENGL_BIT, EGL_NONE }; //// egl-contexts collect all state descriptions needed required for operation EGLint ctxattr[] = { #if 0 EGL_CONTEXT_CLIENT_VERSION, 2, #endif EGL_NONE }; EGLConfig ecfg; EGLint numConfig; eglContext = EGL_NO_CONTEXT; xDisplay = XOpenDisplay(NULL); if (xDisplay == NULL) { fprintf(stderr, "Failed to open DISPLAY.\n"); return false; } root = DefaultRootWindow(xDisplay); swa.event_mask = ExposureMask | PointerMotionMask | KeyPressMask; win = XCreateWindow( xDisplay, root, 0, 0, width, height, 0, CopyFromParent, InputOutput, CopyFromParent, CWEventMask, &swa); xWindow = win; /////// the egl part ////////////////////////////////////////////////////////////////// // egl provides an interface to connect the graphics related functionality of openGL ES // with the windowing interface and functionality of the native operation system (X11 // in our case. eglDisplay = eglGetDisplay( (EGLNativeDisplayType) xDisplay ); if ( eglDisplay == EGL_NO_DISPLAY ) { fprintf(stderr, "Got no EGL display.\n"); return false; } eglBindAPI(EGL_OPENGL_API); int m,n; if ( !eglInitialize( eglDisplay, &m, &n ) ) { fprintf(stderr, "Unable to initialize EGL\n"); return false; } if ( !eglChooseConfig( eglDisplay, attr, &ecfg, 1, &numConfig ) ) { fprintf(stderr, "Failed to choose config (eglError: %d)\n", eglGetError()); return false; } if ( numConfig != 1 ) { fprintf(stderr, "Didn't get exactly one config, but %d", numConfig); return false; } eglSurface = eglCreateWindowSurface ( eglDisplay, ecfg, win, NULL ); if ( eglSurface == EGL_NO_SURFACE ) { fprintf(stderr, "Unable to create EGL surface (eglError: %d)\n", eglGetError()); return false; } eglContext = eglCreateContext ( eglDisplay, ecfg, EGL_NO_CONTEXT, ctxattr ); if ( eglContext == EGL_NO_CONTEXT ) { fprintf(stderr, "Unable to create EGL context (eglError: %d)\n", eglGetError()); return false; } //// associate the egl-context with the egl-surface eglMakeCurrent( eglDisplay, eglSurface, eglSurface, eglContext); glClearColor(1.0, 1.0, 1.0, 1.0); glClear(GL_COLOR_BUFFER_BIT); glFinish(); eglSwapBuffers(eglDisplay, eglSurface); return true; } #endif static const char* cl_test_channel_order_string(cl_channel_order order) { switch(order) { #define DECL_ORDER(WHICH) case CL_##WHICH: return "CL_"#WHICH DECL_ORDER(R); DECL_ORDER(A); DECL_ORDER(RG); DECL_ORDER(RA); DECL_ORDER(RGB); DECL_ORDER(RGBA); DECL_ORDER(BGRA); DECL_ORDER(ARGB); DECL_ORDER(INTENSITY); DECL_ORDER(LUMINANCE); DECL_ORDER(Rx); DECL_ORDER(RGx); DECL_ORDER(RGBx); #undef DECL_ORDER default: return "Unsupported image channel order"; }; } static const char* cl_test_channel_type_string(cl_channel_type type) { switch(type) { #define DECL_TYPE(WHICH) case CL_##WHICH: return "CL_"#WHICH DECL_TYPE(SNORM_INT8); DECL_TYPE(SNORM_INT16); DECL_TYPE(UNORM_INT8); DECL_TYPE(UNORM_INT16); DECL_TYPE(UNORM_SHORT_565); DECL_TYPE(UNORM_SHORT_555); DECL_TYPE(UNORM_INT_101010); DECL_TYPE(SIGNED_INT8); DECL_TYPE(SIGNED_INT16); DECL_TYPE(SIGNED_INT32); DECL_TYPE(UNSIGNED_INT8); DECL_TYPE(UNSIGNED_INT16); DECL_TYPE(UNSIGNED_INT32); DECL_TYPE(HALF_FLOAT); DECL_TYPE(FLOAT); #undef DECL_TYPE default: return "Unsupported image channel type"; }; } static void clpanic(const char *msg, int rval) { printf("Failed: %s (%d)\n", msg, rval); exit(-1); } char* cl_do_kiss_path(const char *file, cl_device_id device) { cl_int ver; const char *sub_path = NULL; char *ker_path = NULL; const char *kiss_path = getenv("OCL_KERNEL_PATH"); size_t sz = strlen(file); if (device == NULL) sub_path = ""; else { if (clGetGenVersionIntel(device, &ver) != CL_SUCCESS) clpanic("Unable to get Gen version", -1); sub_path = ""; } if (kiss_path == NULL) clpanic("set OCL_KERNEL_PATH. This is where the kiss kernels are", -1); sz += strlen(kiss_path) + strlen(sub_path) + 2; /* +1 for end of string, +1 for '/' */ if ((ker_path = (char*) malloc(sz)) == NULL) clpanic("Allocation failed", -1); sprintf(ker_path, "%s/%s%s", kiss_path, sub_path, file); return ker_path; } int cl_kernel_init(const char *file_name, const char *kernel_name, int format, const char * build_opt) { cl_file_map_t *fm = NULL; char *ker_path = NULL; cl_int status = CL_SUCCESS; static const char *prevFileName = NULL; /* Load the program and build it */ if (!program || (program && (!prevFileName || strcmp(prevFileName, file_name)))) { if (program) clReleaseProgram(program); ker_path = cl_do_kiss_path(file_name, device); if (format == LLVM) program = clCreateProgramWithLLVMIntel(ctx, 1, &device, ker_path, &status); else if (format == SOURCE) { cl_file_map_t *fm = cl_file_map_new(); FATAL_IF (cl_file_map_open(fm, ker_path) != CL_FILE_MAP_SUCCESS, "Failed to open file \"%s\" with kernel \"%s\". Did you properly set OCL_KERNEL_PATH variable?", file_name, kernel_name); const char *src = cl_file_map_begin(fm); const size_t sz = cl_file_map_size(fm); program = clCreateProgramWithSource(ctx, 1, &src, &sz, &status); cl_file_map_delete(fm); } else FATAL("Not able to create program from binary"); if (status != CL_SUCCESS) { fprintf(stderr, "error calling clCreateProgramWithBinary\n"); goto error; } prevFileName = file_name; } /* OCL requires to build the program even if it is created from a binary */ OCL_CALL (clBuildProgram, program, 1, &device, build_opt, NULL, NULL); /* Create a kernel from the program */ if (kernel) clReleaseKernel(kernel); kernel = clCreateKernel(program, kernel_name, &status); if (status != CL_SUCCESS) { fprintf(stderr, "error calling clCreateKernel\n"); goto error; } exit: free(ker_path); cl_file_map_delete(fm); return status; error: prevFileName = NULL; goto exit; } #define GET_PLATFORM_STR_INFO(LOWER_NAME, NAME) \ { \ size_t param_value_size; \ OCL_CALL (clGetPlatformInfo, platform, CL_PLATFORM_##NAME, 0, 0, ¶m_value_size); \ std::vector param_value(param_value_size); \ OCL_CALL (clGetPlatformInfo, platform, CL_PLATFORM_##NAME, \ param_value_size, param_value.empty() ? NULL : ¶m_value.front(), \ ¶m_value_size); \ std::string str; \ if (!param_value.empty()) \ str = std::string(¶m_value.front(), param_value_size-1); \ printf("platform_" #LOWER_NAME " \"%s\"\n", str.c_str()); \ } #include #define GET_DEVICE_STR_INFO(LOWER_NAME, NAME) \ std::string LOWER_NAME ##Str; \ OCL_CALL (clGetDeviceInfo, device, CL_DEVICE_##NAME, 0, 0, ¶m_value_size); \ { \ std::vector param_value(param_value_size); \ OCL_CALL (clGetDeviceInfo, device, CL_DEVICE_##NAME, \ param_value_size, param_value.empty() ? NULL : ¶m_value.front(), \ ¶m_value_size); \ if (!param_value.empty()) \ LOWER_NAME ##Str = std::string(¶m_value.front(), param_value_size-1); \ } \ printf("device_" #LOWER_NAME " \"%s\"\n", LOWER_NAME ##Str.c_str()); int cl_ocl_init(void) { cl_int status = CL_SUCCESS; cl_uint platform_n; size_t i; #ifdef HAS_EGL bool hasGLExt = false; #endif cl_context_properties *props = NULL; /* Get the platform number */ OCL_CALL (clGetPlatformIDs, 0, NULL, &platform_n); printf("platform number %u\n", platform_n); assert(platform_n >= 1); /* Get a valid platform */ OCL_CALL (clGetPlatformIDs, 1, &platform, &platform_n); GET_PLATFORM_STR_INFO(profile, PROFILE); GET_PLATFORM_STR_INFO(name, NAME); GET_PLATFORM_STR_INFO(vendor, VENDOR); GET_PLATFORM_STR_INFO(version, VERSION); GET_PLATFORM_STR_INFO(extensions, EXTENSIONS); /* Get the device (only GPU device is supported right now) */ OCL_CALL (clGetDeviceIDs, platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL); { size_t param_value_size; GET_DEVICE_STR_INFO(profile, PROFILE); GET_DEVICE_STR_INFO(name, NAME); GET_DEVICE_STR_INFO(vendor, VENDOR); GET_DEVICE_STR_INFO(version, VERSION); GET_DEVICE_STR_INFO(extensions, EXTENSIONS); GET_DEVICE_STR_INFO(opencl_c_version, OPENCL_C_VERSION); #ifdef HAS_EGL if (std::strstr(extensionsStr.c_str(), "cl_khr_gl_sharing")) { hasGLExt = true; } #endif } #ifdef HAS_EGL if (hasGLExt) { int i = 0; props = new cl_context_properties[7]; props[i++] = CL_CONTEXT_PLATFORM; props[i++] = (cl_context_properties)platform; if (init_egl_window(EGL_WINDOW_WIDTH, EGL_WINDOW_HEIGHT)) { props[i++] = CL_EGL_DISPLAY_KHR; props[i++] = (cl_context_properties)eglGetCurrentDisplay(); props[i++] = CL_GL_CONTEXT_KHR; props[i++] = (cl_context_properties)eglGetCurrentContext(); } props[i++] = 0; } #endif /* Now create a context */ ctx = clCreateContext(props, 1, &device, NULL, NULL, &status); if (status != CL_SUCCESS) { fprintf(stderr, "error calling clCreateContext\n"); goto error; } /* All image types currently supported by the context */ cl_image_format fmt[256]; cl_uint fmt_n; clGetSupportedImageFormats(ctx, 0, CL_MEM_OBJECT_IMAGE2D, 256, fmt, &fmt_n); printf("%u image formats are supported\n", fmt_n); for (i = 0; i < fmt_n; ++i) printf("[%s %s]\n", cl_test_channel_order_string(fmt[i].image_channel_order), cl_test_channel_type_string(fmt[i].image_channel_data_type)); /* We are going to push NDRange kernels here */ queue = clCreateCommandQueue(ctx, device, 0, &status); if (status != CL_SUCCESS) { fprintf(stderr, "error calling clCreateCommandQueue\n"); goto error; } error: if (props) delete props; return status; } int cl_test_init(const char *file_name, const char *kernel_name, int format) { cl_int status = CL_SUCCESS; /* Initialize OCL */ if ((status = cl_ocl_init()) != CL_SUCCESS) goto error; /* Load the kernel */ if ((status = cl_kernel_init(file_name, kernel_name, format, NULL)) != CL_SUCCESS) goto error; error: return status; } void cl_kernel_destroy(bool needDestroyProgram) { if (kernel) { clReleaseKernel(kernel); kernel = NULL; } if (needDestroyProgram && program) { clReleaseProgram(program); program = NULL; } } void cl_ocl_destroy(void) { clReleaseCommandQueue(queue); clReleaseContext(ctx); #ifdef HAS_EGL if (eglContext != NULL) { cl_ocl_destroy_egl_window(); eglContext = NULL; } #endif } void cl_test_destroy(void) { cl_kernel_destroy(); cl_ocl_destroy(); printf("%i memory leaks\n", clReportUnfreedIntel()); assert(clReportUnfreedIntel() == 0); } void cl_buffer_destroy(void) { int i; for (i = 0; i < MAX_BUFFER_N; ++i) { if (buf_data[i] != NULL) { clUnmapBufferIntel(buf[i]); buf_data[i] = NULL; } if (buf[i] != NULL) { clReleaseMemObject(buf[i]); buf[i] = NULL; } } } void cl_report_perf_counters(cl_mem perf) { cl_int status = CL_SUCCESS; uint32_t *start = NULL, *end = NULL; uint32_t i; if (perf == NULL) return; start = (uint32_t*) clMapBufferIntel(perf, &status); assert(status == CL_SUCCESS && start != NULL); end = start + 128; printf("BEFORE\n"); for (i = 0; i < 6*8; ++i) { if (i % 8 == 0) printf("\n"); printf("[%3u 0x%8x] ", i, start[i]); } printf("\n\n"); printf("AFTER\n"); for (i = 0; i < 6*8; ++i) { if (i % 8 == 0) printf("\n"); printf("[%3u 0x%8x] ", i, end[i]); } printf("\n\n"); printf("DIFF\n"); for (i = 0; i < 6*8; ++i) { if (i % 8 == 0) printf("\n"); printf("[%3u %8i] ", i, end[i] - start[i]); } printf("\n\n"); clUnmapBufferIntel(perf); } struct bmphdr { // 2 bytes of magic here, "BM", total header size is 54 bytes! int filesize; // 4 total file size incl header short as0, as1; // 8 app specific int bmpoffset; // 12 ofset of bmp data int headerbytes; // 16 bytes in header from this point (40 actually) int width; // 20 int height; // 24 short nplanes; // 26 no of color planes short bpp; // 28 bits/pixel int compression; // 32 BI_RGB = 0 = no compression int sizeraw; // 36 size of raw bmp file, excluding header, incl padding int hres; // 40 horz resolutions pixels/meter int vres; // 44 int npalcolors; // 48 No of colors in palette int nimportant; // 52 No of important colors // raw b, g, r data here, dword aligned per scan line }; int *cl_read_bmp(const char *filename, int *width, int *height) { struct bmphdr hdr; char *bmppath = cl_do_kiss_path(filename, device); FILE *fp = fopen(bmppath, "rb"); assert(fp); char magic[2]; int ret; ret = fread(&magic[0], 1, 2, fp); assert(2 == ret); assert(magic[0] == 'B' && magic[1] == 'M'); ret = fread(&hdr, sizeof(hdr), 1, fp); assert(1 == ret); assert(hdr.width > 0 && hdr.height > 0 && hdr.nplanes == 1 && hdr.compression == 0); int *rgb32 = (int *) malloc(hdr.width * hdr.height * sizeof(int)); assert(rgb32); int x, y; int *dst = rgb32; for (y = 0; y < hdr.height; y++) { for (x = 0; x < hdr.width; x++) { assert(!feof(fp)); int b = (getc(fp) & 0x0ff); int g = (getc(fp) & 0x0ff); int r = (getc(fp) & 0x0ff); *dst++ = (r | (g << 8) | (b << 16) | 0xff000000); /* abgr */ } while (x & 3) { getc(fp); x++; } // each scanline padded to dword // printf("read row %d\n", y); // fflush(stdout); } fclose(fp); *width = hdr.width; *height = hdr.height; free(bmppath); return rgb32; } void cl_write_bmp(const int *data, int width, int height, const char *filename) { int x, y; FILE *fp = fopen(filename, "wb"); assert(fp); char *raw = (char *) malloc(width * height * sizeof(int)); // at most assert(raw); char *p = raw; for (y = 0; y < height; y++) { for (x = 0; x < width; x++) { int c = *data++; *p++ = ((c >> 16) & 0xff); *p++ = ((c >> 8) & 0xff); *p++ = ((c >> 0) & 0xff); } while (x & 3) { *p++ = 0; x++; } // pad to dword } int sizeraw = p - raw; int scanline = (width * 3 + 3) & ~3; assert(sizeraw == scanline * height); struct bmphdr hdr; hdr.filesize = scanline * height + sizeof(hdr) + 2; hdr.as0 = 0; hdr.as1 = 0; hdr.bmpoffset = sizeof(hdr) + 2; hdr.headerbytes = 40; hdr.width = width; hdr.height = height; hdr.nplanes = 1; hdr.bpp = 24; hdr.compression = 0; hdr.sizeraw = sizeraw; hdr.hres = 0; // 2834; hdr.vres = 0; // 2834; hdr.npalcolors = 0; hdr.nimportant = 0; /* Now write bmp file */ char magic[2] = { 'B', 'M' }; fwrite(&magic[0], 1, 2, fp); fwrite(&hdr, 1, sizeof(hdr), fp); fwrite(raw, 1, hdr.sizeraw, fp); fclose(fp); free(raw); } static const float pixel_threshold = 0.05f; static const float max_error_ratio = 0.001f; int cl_check_image(const int *img, int w, int h, const char *bmp) { int refw, refh; int *ref = cl_read_bmp(bmp, &refw, &refh); if (ref == NULL || refw != w || refh != h) return 0; const int n = w*h; int discrepancy = 0; for (int i = 0; i < n; ++i) { const float r = (float) (img[i] & 0xff); const float g = (float) ((img[i] >> 8) & 0xff); const float b = (float) ((img[i] >> 16) & 0xff); const float rr = (float) (ref[i] & 0xff); const float rg = (float) ((ref[i] >> 8) & 0xff); const float rb = (float) ((ref[i] >> 16) & 0xff); const float dr = fabs(r-rr) / (1.f/255.f + std::max(r,rr)); const float dg = fabs(g-rg) / (1.f/255.f + std::max(g,rg)); const float db = fabs(b-rb) / (1.f/255.f + std::max(b,rb)); const float err = sqrtf(dr*dr+dg*dg+db*db); if (err > pixel_threshold) discrepancy++; } free(ref); return (float(discrepancy) / float(n) > max_error_ratio) ? 0 : 1; } typedef struct { unsigned int mantissa:23; unsigned int exponent:8; unsigned int sign:1; } FLOAT; typedef union { float f; unsigned int i; FLOAT spliter; } SF; const float cl_FLT_ULP(float float_number) { SF floatBin, ulpBin, ulpBinBase; floatBin.f = float_number; ulpBin.spliter.sign = ulpBinBase.spliter.sign = 0; ulpBin.spliter.exponent = ulpBinBase.spliter.exponent = floatBin.spliter.exponent; ulpBin.spliter.mantissa = 0x1; ulpBinBase.spliter.mantissa = 0x0; return ulpBin.f - ulpBinBase.f; } const int cl_INT_ULP(int int_number) { return 0; }