/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #include "utest_file_map.hpp" #include "utest_helper.hpp" #include "utest_error.h" #include "CL/cl.h" #include "CL/cl_intel.h" #include #include #include #include #include #include #define FATAL(...) \ do { \ fprintf(stderr, "error: "); \ fprintf(stderr, __VA_ARGS__); \ fprintf(stderr, "\n");\ assert(0); \ exit(-1); \ } while (0) #define FATAL_IF(COND, ...) \ do { \ if (COND) FATAL(__VA_ARGS__); \ } while (0) cl_platform_id platform = NULL; cl_device_id device = NULL; cl_context ctx = NULL; __thread cl_program program = NULL; __thread cl_kernel kernel = NULL; cl_command_queue queue = NULL; __thread cl_mem buf[MAX_BUFFER_N] = {}; __thread void *buf_data[MAX_BUFFER_N] = {}; __thread size_t globals[3] = {}; __thread size_t locals[3] = {}; float ULPSIZE_FAST_MATH = 10000.; __attribute__ ((visibility ("internal"))) clGetKernelSubGroupInfoKHR_cb* utestclGetKernelSubGroupInfoKHR = NULL; #ifdef HAS_GL_EGL_X11 Display *xDisplay; EGLDisplay eglDisplay; EGLContext eglContext = NULL; EGLSurface eglSurface; Window xWindow; void cl_ocl_destroy_egl_window() { eglMakeCurrent(eglDisplay, EGL_NO_SURFACE, EGL_NO_SURFACE, EGL_NO_CONTEXT); eglDestroyContext(eglDisplay, eglContext); eglDestroySurface(eglDisplay, eglSurface); XDestroyWindow(xDisplay, xWindow); XCloseDisplay(xDisplay); } bool init_egl_window(int width, int height) { XSetWindowAttributes swa; Window win, root; EGLint attr[] = { // some attributes to set up our egl-interface EGL_BUFFER_SIZE, 16, EGL_RENDERABLE_TYPE, EGL_OPENGL_BIT, EGL_NONE }; //// egl-contexts collect all state descriptions needed required for operation EGLint ctxattr[] = { #if 0 EGL_CONTEXT_CLIENT_VERSION, 2, #endif EGL_NONE }; EGLConfig ecfg; EGLint numConfig; eglContext = EGL_NO_CONTEXT; xDisplay = XOpenDisplay(NULL); if (xDisplay == NULL) { fprintf(stderr, "Failed to open DISPLAY.\n"); return false; } root = DefaultRootWindow(xDisplay); swa.event_mask = ExposureMask | PointerMotionMask | KeyPressMask; win = XCreateWindow( xDisplay, root, 0, 0, width, height, 0, CopyFromParent, InputOutput, CopyFromParent, CWEventMask, &swa); xWindow = win; /////// the egl part ////////////////////////////////////////////////////////////////// // egl provides an interface to connect the graphics related functionality of openGL ES // with the windowing interface and functionality of the native operation system (X11 // in our case. eglDisplay = eglGetDisplay( (EGLNativeDisplayType) xDisplay ); if ( eglDisplay == EGL_NO_DISPLAY ) { fprintf(stderr, "Got no EGL display.\n"); return false; } eglBindAPI(EGL_OPENGL_API); int m,n; if ( !eglInitialize( eglDisplay, &m, &n ) ) { fprintf(stderr, "Unable to initialize EGL\n"); return false; } if ( !eglChooseConfig( eglDisplay, attr, &ecfg, 1, &numConfig ) ) { fprintf(stderr, "Failed to choose config (eglError: %d)\n", eglGetError()); return false; } if ( numConfig != 1 ) { fprintf(stderr, "Didn't get exactly one config, but %d", numConfig); return false; } eglSurface = eglCreateWindowSurface ( eglDisplay, ecfg, win, NULL ); if ( eglSurface == EGL_NO_SURFACE ) { fprintf(stderr, "Unable to create EGL surface (eglError: %d)\n", eglGetError()); return false; } eglContext = eglCreateContext ( eglDisplay, ecfg, EGL_NO_CONTEXT, ctxattr ); if ( eglContext == EGL_NO_CONTEXT ) { fprintf(stderr, "Unable to create EGL context (eglError: %d)\n", eglGetError()); return false; } //// associate the egl-context with the egl-surface eglMakeCurrent( eglDisplay, eglSurface, eglSurface, eglContext); glClearColor(1.0, 1.0, 1.0, 1.0); glClear(GL_COLOR_BUFFER_BIT); glFinish(); eglSwapBuffers(eglDisplay, eglSurface); return true; } #endif static const char* cl_test_channel_order_string(cl_channel_order order) { switch(order) { #define DECL_ORDER(WHICH) case CL_##WHICH: return "CL_"#WHICH DECL_ORDER(R); DECL_ORDER(A); DECL_ORDER(RG); DECL_ORDER(RA); DECL_ORDER(RGB); DECL_ORDER(RGBA); DECL_ORDER(BGRA); DECL_ORDER(ARGB); DECL_ORDER(INTENSITY); DECL_ORDER(LUMINANCE); DECL_ORDER(Rx); DECL_ORDER(RGx); DECL_ORDER(RGBx); DECL_ORDER(sRGBA); DECL_ORDER(sBGRA); #undef DECL_ORDER default: return "Unsupported image channel order"; }; } static const char* cl_test_channel_type_string(cl_channel_type type) { switch(type) { #define DECL_TYPE(WHICH) case CL_##WHICH: return "CL_"#WHICH DECL_TYPE(SNORM_INT8); DECL_TYPE(SNORM_INT16); DECL_TYPE(UNORM_INT8); DECL_TYPE(UNORM_INT16); DECL_TYPE(UNORM_SHORT_565); DECL_TYPE(UNORM_SHORT_555); DECL_TYPE(UNORM_INT_101010); DECL_TYPE(SIGNED_INT8); DECL_TYPE(SIGNED_INT16); DECL_TYPE(SIGNED_INT32); DECL_TYPE(UNSIGNED_INT8); DECL_TYPE(UNSIGNED_INT16); DECL_TYPE(UNSIGNED_INT32); DECL_TYPE(HALF_FLOAT); DECL_TYPE(FLOAT); #undef DECL_TYPE default: return "Unsupported image channel type"; }; } static void clpanic(const char *msg, int rval) { printf("Failed: %s (%d)\n", msg, rval); exit(-1); } char* cl_do_kiss_path(const char *file, cl_device_id device) { const char *sub_path = NULL; char *ker_path = NULL; const char *kiss_path = getenv("OCL_KERNEL_PATH"); size_t sz = strlen(file); sub_path = ""; if (kiss_path == NULL) clpanic("set OCL_KERNEL_PATH. This is where the kiss kernels are", -1); sz += strlen(kiss_path) + strlen(sub_path) + 2; /* +1 for end of string, +1 for '/' */ if ((ker_path = (char*) malloc(sz)) == NULL) clpanic("Allocation failed", -1); sprintf(ker_path, "%s/%s%s", kiss_path, sub_path, file); return ker_path; } int cl_kernel_init(const char *file_name, const char *kernel_name, int format, const char * build_opt) { cl_file_map_t *fm = NULL; char *ker_path = NULL; cl_int status = CL_SUCCESS; static const char *prevFileName = NULL; /* Load the program and build it */ if (!program || (program && (!prevFileName || strcmp(prevFileName, file_name)))) { if (program) clReleaseProgram(program); ker_path = cl_do_kiss_path(file_name, device); if (format == LLVM) { assert(0); } else if (format == SOURCE) { cl_file_map_t *fm = cl_file_map_new(); if(!fm) { fprintf(stderr, "run out of memory\n"); goto error; } FATAL_IF (cl_file_map_open(fm, ker_path) != CL_FILE_MAP_SUCCESS, "Failed to open file \"%s\" with kernel \"%s\". Did you properly set OCL_KERNEL_PATH variable?", file_name, kernel_name); const char *src = cl_file_map_begin(fm); const size_t sz = cl_file_map_size(fm); program = clCreateProgramWithSource(ctx, 1, &src, &sz, &status); cl_file_map_delete(fm); } else FATAL("Not able to create program from binary"); if (status != CL_SUCCESS) { fprintf(stderr, "error calling clCreateProgramWithBinary\n"); goto error; } prevFileName = file_name; /* OCL requires to build the program even if it is created from a binary */ OCL_CALL (clBuildProgram, program, 1, &device, build_opt, NULL, NULL); } /* Create a kernel from the program */ if (kernel) clReleaseKernel(kernel); kernel = clCreateKernel(program, kernel_name, &status); if (status != CL_SUCCESS) { fprintf(stderr, "error calling clCreateKernel\n"); goto error; } exit: free(ker_path); cl_file_map_delete(fm); return status; error: prevFileName = NULL; goto exit; } int cl_kernel_compile(const char *file_name, const char *kernel_name, const char * compile_opt) { cl_file_map_t *fm = NULL; char *ker_path = NULL; cl_int status = CL_SUCCESS; static const char *prevFileName = NULL; /* Load the program and build it */ if (!program || (program && (!prevFileName || strcmp(prevFileName, file_name)))) { if (program) clReleaseProgram(program); ker_path = cl_do_kiss_path(file_name, device); cl_file_map_t *fm = cl_file_map_new(); if(!fm) { fprintf(stderr, "run out of memory\n"); goto error; } FATAL_IF (cl_file_map_open(fm, ker_path) != CL_FILE_MAP_SUCCESS, "Failed to open file \"%s\" with kernel \"%s\". Did you properly set OCL_KERNEL_PATH variable?", file_name, kernel_name); const char *src = cl_file_map_begin(fm); const size_t sz = cl_file_map_size(fm); program = clCreateProgramWithSource(ctx, 1, &src, &sz, &status); cl_file_map_delete(fm); if (status != CL_SUCCESS) { fprintf(stderr, "error calling clCreateProgramWithSource\n"); goto error; } prevFileName = file_name; OCL_CALL (clCompileProgram, program, 1, &device, // num_devices & device_list compile_opt, // compile_options 0, // num_input_headers NULL, NULL, NULL, NULL); OCL_ASSERT(status == CL_SUCCESS); } exit: free(ker_path); cl_file_map_delete(fm); return status; error: prevFileName = NULL; goto exit; } int cl_kernel_link(const char *file_name, const char *kernel_name, const char * link_opt) { cl_file_map_t *fm = NULL; char *ker_path = NULL; cl_int status = CL_SUCCESS; static const char *prevFileName = NULL; /* Load the program and build it */ if (!program || (program && (!prevFileName || strcmp(prevFileName, file_name)))) { if (program) clReleaseProgram(program); ker_path = cl_do_kiss_path(file_name, device); cl_file_map_t *fm = cl_file_map_new(); if(!fm) { fprintf(stderr, "run out of memory\n"); goto error; } FATAL_IF (cl_file_map_open(fm, ker_path) != CL_FILE_MAP_SUCCESS, "Failed to open file \"%s\" with kernel \"%s\". Did you properly set OCL_KERNEL_PATH variable?", file_name, kernel_name); const char *src = cl_file_map_begin(fm); const size_t sz = cl_file_map_size(fm); program = clCreateProgramWithSource(ctx, 1, &src, &sz, &status); cl_file_map_delete(fm); if (status != CL_SUCCESS) { fprintf(stderr, "error calling clCreateProgramWithSource\n"); goto error; } prevFileName = file_name; OCL_CALL (clCompileProgram, program, 1, &device, // num_devices & device_list NULL, // compile_options 0, // num_input_headers NULL, NULL, NULL, NULL); OCL_ASSERT(status==CL_SUCCESS); cl_program input_programs[1] = {program}; program = clLinkProgram(ctx, 1, &device, link_opt, 1, input_programs, NULL, NULL, &status); OCL_ASSERT(program != NULL); OCL_ASSERT(status == CL_SUCCESS); clReleaseProgram(input_programs[0]); } /* Create a kernel from the program */ if (kernel) clReleaseKernel(kernel); kernel = clCreateKernel(program, kernel_name, &status); if (status != CL_SUCCESS) { fprintf(stderr, "error calling clCreateKernel\n"); goto error; } exit: free(ker_path); cl_file_map_delete(fm); return status; error: prevFileName = NULL; goto exit; } #define GET_PLATFORM_STR_INFO(LOWER_NAME, NAME) \ { \ size_t param_value_size; \ OCL_CALL (clGetPlatformInfo, platform, CL_PLATFORM_##NAME, 0, 0, ¶m_value_size); \ std::vector param_value(param_value_size); \ OCL_CALL (clGetPlatformInfo, platform, CL_PLATFORM_##NAME, \ param_value_size, param_value.empty() ? NULL : ¶m_value.front(), \ ¶m_value_size); \ std::string str; \ if (!param_value.empty()) \ str = std::string(¶m_value.front(), param_value_size-1); \ printf("platform_" #LOWER_NAME " \"%s\"\n", str.c_str()); \ } #include #define GET_DEVICE_STR_INFO(LOWER_NAME, NAME) \ std::string LOWER_NAME ##Str; \ OCL_CALL (clGetDeviceInfo, device, CL_DEVICE_##NAME, 0, 0, ¶m_value_size); \ { \ std::vector param_value(param_value_size); \ OCL_CALL (clGetDeviceInfo, device, CL_DEVICE_##NAME, \ param_value_size, param_value.empty() ? NULL : ¶m_value.front(), \ ¶m_value_size); \ if (!param_value.empty()) \ LOWER_NAME ##Str = std::string(¶m_value.front(), param_value_size-1); \ } \ printf("device_" #LOWER_NAME " \"%s\"\n", LOWER_NAME ##Str.c_str()); int cl_ocl_init(void) { cl_int status = CL_SUCCESS; cl_uint platform_n; size_t i; #ifdef HAS_GL_EGL_X11 bool hasGLExt = false; #endif cl_context_properties *props = NULL; /* Get the platform number */ OCL_CALL (clGetPlatformIDs, 0, NULL, &platform_n); printf("platform number %u\n", platform_n); assert(platform_n >= 1); /* Get a valid platform */ OCL_CALL (clGetPlatformIDs, 1, &platform, &platform_n); GET_PLATFORM_STR_INFO(profile, PROFILE); GET_PLATFORM_STR_INFO(name, NAME); GET_PLATFORM_STR_INFO(vendor, VENDOR); GET_PLATFORM_STR_INFO(version, VERSION); GET_PLATFORM_STR_INFO(extensions, EXTENSIONS); /* Get the device (only GPU device is supported right now) */ try { OCL_CALL (clGetDeviceIDs, platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL); { size_t param_value_size; GET_DEVICE_STR_INFO(profile, PROFILE); GET_DEVICE_STR_INFO(name, NAME); GET_DEVICE_STR_INFO(vendor, VENDOR); GET_DEVICE_STR_INFO(version, VERSION); GET_DEVICE_STR_INFO(extensions, EXTENSIONS); GET_DEVICE_STR_INFO(opencl_c_version, OPENCL_C_VERSION); #ifdef HAS_GL_EGL_X11 if (std::strstr(extensionsStr.c_str(), "cl_khr_gl_sharing")) { hasGLExt = true; } #endif } } catch (...) { fprintf(stderr, "error calling clGetDeviceIDs\n"); status = CL_DEVICE_NOT_FOUND; goto error; } #ifdef HAS_GL_EGL_X11 if (hasGLExt) { int i = 0; props = new cl_context_properties[7]; props[i++] = CL_CONTEXT_PLATFORM; props[i++] = (cl_context_properties)platform; if (init_egl_window(EGL_WINDOW_WIDTH, EGL_WINDOW_HEIGHT)) { props[i++] = CL_EGL_DISPLAY_KHR; props[i++] = (cl_context_properties)eglGetCurrentDisplay(); props[i++] = CL_GL_CONTEXT_KHR; props[i++] = (cl_context_properties)eglGetCurrentContext(); } props[i++] = 0; } #endif /* Now create a context */ ctx = clCreateContext(props, 1, &device, NULL, NULL, &status); if (status != CL_SUCCESS) { fprintf(stderr, "error calling clCreateContext\n"); goto error; } /* All image types currently supported by the context */ cl_image_format fmt[256]; cl_uint fmt_n; clGetSupportedImageFormats(ctx, 0, CL_MEM_OBJECT_IMAGE2D, 256, fmt, &fmt_n); printf("%u image formats are supported\n", fmt_n); for (i = 0; i < fmt_n; ++i) printf("[%s %s]\n", cl_test_channel_order_string(fmt[i].image_channel_order), cl_test_channel_type_string(fmt[i].image_channel_data_type)); /* We are going to push NDRange kernels here */ queue = clCreateCommandQueue(ctx, device, 0, &status); if (status != CL_SUCCESS) { fprintf(stderr, "error calling clCreateCommandQueue\n"); goto error; } error: if (props) delete[] props; return status; } int cl_test_init(const char *file_name, const char *kernel_name, int format) { cl_int status = CL_SUCCESS; /* Initialize OCL */ if ((status = cl_ocl_init()) != CL_SUCCESS) goto error; /* Load the kernel */ if ((status = cl_kernel_init(file_name, kernel_name, format, NULL)) != CL_SUCCESS) goto error; error: return status; } void cl_kernel_destroy(bool needDestroyProgram) { if (kernel) { clReleaseKernel(kernel); kernel = NULL; } if (needDestroyProgram && program) { clReleaseProgram(program); program = NULL; } } void cl_ocl_destroy(void) { clReleaseCommandQueue(queue); clReleaseContext(ctx); #ifdef HAS_GL_EGL_X11 if (eglContext != NULL) { cl_ocl_destroy_egl_window(); eglContext = NULL; } #endif } void cl_test_destroy(void) { cl_kernel_destroy(); cl_ocl_destroy(); } void cl_buffer_destroy(void) { int i; for (i = 0; i < MAX_BUFFER_N; ++i) { if (buf_data[i] != NULL) { clEnqueueUnmapMemObject(queue, buf[i], buf_data[i], 0, NULL, NULL); buf_data[i] = NULL; } if (buf[i] != NULL) { clReleaseMemObject(buf[i]); buf[i] = NULL; } } } void cl_report_perf_counters(cl_mem perf) { cl_int status = CL_SUCCESS; uint32_t *start = NULL, *end = NULL; uint32_t i; if (perf == NULL) return; start = (uint32_t*)clEnqueueMapBuffer(queue, perf, CL_TRUE, CL_MAP_READ, 0, 128 * sizeof(uint32_t)/*size*/, 0, NULL, NULL, &status); assert(status == CL_SUCCESS && start != NULL); end = start + 128; printf("BEFORE\n"); for (i = 0; i < 6*8; ++i) { if (i % 8 == 0) printf("\n"); printf("[%3u 0x%8x] ", i, start[i]); } printf("\n\n"); printf("AFTER\n"); for (i = 0; i < 6*8; ++i) { if (i % 8 == 0) printf("\n"); printf("[%3u 0x%8x] ", i, end[i]); } printf("\n\n"); printf("DIFF\n"); for (i = 0; i < 6*8; ++i) { if (i % 8 == 0) printf("\n"); printf("[%3u %8i] ", i, end[i] - start[i]); } printf("\n\n"); clEnqueueUnmapMemObject(queue, perf, start, 0, NULL, NULL); } struct bmphdr { // 2 bytes of magic here, "BM", total header size is 54 bytes! int filesize; // 4 total file size incl header short as0, as1; // 8 app specific int bmpoffset; // 12 ofset of bmp data int headerbytes; // 16 bytes in header from this point (40 actually) int width; // 20 int height; // 24 short nplanes; // 26 no of color planes short bpp; // 28 bits/pixel int compression; // 32 BI_RGB = 0 = no compression int sizeraw; // 36 size of raw bmp file, excluding header, incl padding int hres; // 40 horz resolutions pixels/meter int vres; // 44 int npalcolors; // 48 No of colors in palette int nimportant; // 52 No of important colors // raw b, g, r data here, dword aligned per scan line }; int *cl_read_bmp(const char *filename, int *width, int *height) { struct bmphdr hdr; char *bmppath = cl_do_kiss_path(filename, device); FILE *fp = fopen(bmppath, "rb"); assert(fp); char magic[2]; int ret; ret = fread(&magic[0], 1, 2, fp); if(2 != ret){ fclose(fp); free(bmppath); return NULL; } assert(magic[0] == 'B' && magic[1] == 'M'); ret = fread(&hdr, sizeof(hdr), 1, fp); if(1 != ret){ fclose(fp); free(bmppath); return NULL; } assert(hdr.width > 0 && hdr.height > 0 && hdr.nplanes == 1 && hdr.compression == 0); int *rgb32 = (int *) malloc(hdr.width * hdr.height * sizeof(int)); assert(rgb32); int x, y; int *dst = rgb32; for (y = 0; y < hdr.height; y++) { for (x = 0; x < hdr.width; x++) { assert(!feof(fp)); int b = (getc(fp) & 0x0ff); int g = (getc(fp) & 0x0ff); int r = (getc(fp) & 0x0ff); *dst++ = (r | (g << 8) | (b << 16) | 0xff000000); /* abgr */ } while (x & 3) { getc(fp); x++; } // each scanline padded to dword // printf("read row %d\n", y); // fflush(stdout); } fclose(fp); *width = hdr.width; *height = hdr.height; free(bmppath); return rgb32; } void cl_write_bmp(const int *data, int width, int height, const char *filename) { int x, y; FILE *fp = NULL; #if defined(__ANDROID__) char dst_img[256]; snprintf(dst_img, sizeof(dst_img), "/sdcard/ocl/%s", filename); fp = fopen(dst_img, "wb"); if(fp == NULL) return; #else fp = fopen(filename, "wb"); #endif assert(fp); char *raw = (char *) malloc(width * height * sizeof(int)); // at most assert(raw); char *p = raw; for (y = 0; y < height; y++) { for (x = 0; x < width; x++) { int c = *data++; *p++ = ((c >> 16) & 0xff); *p++ = ((c >> 8) & 0xff); *p++ = ((c >> 0) & 0xff); } while (x & 3) { *p++ = 0; x++; } // pad to dword } int sizeraw = p - raw; int scanline = (width * 3 + 3) & ~3; assert(sizeraw == scanline * height); struct bmphdr hdr; hdr.filesize = scanline * height + sizeof(hdr) + 2; hdr.as0 = 0; hdr.as1 = 0; hdr.bmpoffset = sizeof(hdr) + 2; hdr.headerbytes = 40; hdr.width = width; hdr.height = height; hdr.nplanes = 1; hdr.bpp = 24; hdr.compression = 0; hdr.sizeraw = sizeraw; hdr.hres = 0; // 2834; hdr.vres = 0; // 2834; hdr.npalcolors = 0; hdr.nimportant = 0; /* Now write bmp file */ char magic[2] = { 'B', 'M' }; fwrite(&magic[0], 1, 2, fp); fwrite(&hdr, 1, sizeof(hdr), fp); fwrite(raw, 1, hdr.sizeraw, fp); fclose(fp); free(raw); } static const float pixel_threshold = 0.05f; static const float max_error_ratio = 0.001f; int cl_check_image(const int *img, int w, int h, const char *bmp) { int refw, refh; int *ref = cl_read_bmp(bmp, &refw, &refh); if (ref == NULL || refw != w || refh != h) return 0; const int n = w*h; int discrepancy = 0; for (int i = 0; i < n; ++i) { const float r = (float) (img[i] & 0xff); const float g = (float) ((img[i] >> 8) & 0xff); const float b = (float) ((img[i] >> 16) & 0xff); const float rr = (float) (ref[i] & 0xff); const float rg = (float) ((ref[i] >> 8) & 0xff); const float rb = (float) ((ref[i] >> 16) & 0xff); const float dr = fabs(r-rr) / (1.f/255.f + std::max(r,rr)); const float dg = fabs(g-rg) / (1.f/255.f + std::max(g,rg)); const float db = fabs(b-rb) / (1.f/255.f + std::max(b,rb)); const float err = sqrtf(dr*dr+dg*dg+db*db); if (err > pixel_threshold) discrepancy++; } free(ref); return (float(discrepancy) / float(n) > max_error_ratio) ? 0 : 1; } float cl_FLT_ULP(float float_number) { SF floatBin, ulpBin, ulpBinBase; floatBin.f = float_number; ulpBin.spliter.sign = ulpBinBase.spliter.sign = 0; ulpBin.spliter.exponent = ulpBinBase.spliter.exponent = floatBin.spliter.exponent; ulpBin.spliter.mantissa = 0x1; ulpBinBase.spliter.mantissa = 0x0; return ulpBin.f - ulpBinBase.f; } int cl_INT_ULP(int int_number) { return 0; } double time_subtract(struct timeval *y, struct timeval *x, struct timeval *result) { if ( x->tv_sec > y->tv_sec ) return -1; if ((x->tv_sec == y->tv_sec) && (x->tv_usec > y->tv_usec)) return -1; if ( result != NULL){ result->tv_sec = ( y->tv_sec - x->tv_sec ); result->tv_usec = ( y->tv_usec - x->tv_usec ); if (result->tv_usec < 0){ result->tv_sec --; result->tv_usec += 1000000; } } double msec = 1000.0*(y->tv_sec - x->tv_sec) + (y->tv_usec - x->tv_usec)/1000.0; return msec; } float select_ulpsize(float ULPSIZE_FAST_MATH, float ULPSIZE_NO_FAST_MATH) { const char* env_strict = getenv("OCL_STRICT_CONFORMANCE"); float ULPSIZE_FACTOR = ULPSIZE_NO_FAST_MATH; if (env_strict != NULL && strcmp(env_strict, "0") == 0 ) ULPSIZE_FACTOR = ULPSIZE_FAST_MATH; return ULPSIZE_FACTOR; } int cl_check_double(void) { std::string extStr; size_t param_value_size; OCL_CALL(clGetDeviceInfo, device, CL_DEVICE_EXTENSIONS, 0, 0, ¶m_value_size); std::vector param_value(param_value_size); OCL_CALL(clGetDeviceInfo, device, CL_DEVICE_EXTENSIONS, param_value_size, param_value.empty() ? NULL : ¶m_value.front(), ¶m_value_size); if (!param_value.empty()) extStr = std::string(¶m_value.front(), param_value_size-1); if (std::strstr(extStr.c_str(), "cl_khr_fp64") == NULL) { printf("No cl_khr_fp64, Skip!"); return 0; } return 1; } int cl_check_beignet(void) { size_t param_value_size; size_t ret_sz; OCL_CALL(clGetDeviceInfo, device, CL_DEVICE_VERSION, 0, 0, ¶m_value_size); if(param_value_size == 0) { return 0; } char* device_version_str = (char* )malloc(param_value_size * sizeof(char) ); OCL_CALL(clGetDeviceInfo, device, CL_DEVICE_VERSION, param_value_size, (void*)device_version_str, &ret_sz); OCL_ASSERT(ret_sz == param_value_size); if(!strstr(device_version_str, "beignet")) { free(device_version_str); return 0; } free(device_version_str); return 1; } int cl_check_motion_estimation(void) { std::string extStr; size_t param_value_size; OCL_CALL(clGetDeviceInfo, device, CL_DEVICE_EXTENSIONS, 0, 0, ¶m_value_size); std::vector param_value(param_value_size); OCL_CALL(clGetDeviceInfo, device, CL_DEVICE_EXTENSIONS, param_value_size, param_value.empty() ? NULL : ¶m_value.front(), ¶m_value_size); if (!param_value.empty()) extStr = std::string(¶m_value.front(), param_value_size-1); if (std::strstr(extStr.c_str(), "cl_intel_motion_estimation") == NULL) { printf("No cl_intel_motion_estimation, Skip!"); return 0; } return 1; } int cl_check_subgroups(void) { std::string extStr; size_t param_value_size; OCL_CALL(clGetDeviceInfo, device, CL_DEVICE_EXTENSIONS, 0, 0, ¶m_value_size); std::vector param_value(param_value_size); OCL_CALL(clGetDeviceInfo, device, CL_DEVICE_EXTENSIONS, param_value_size, param_value.empty() ? NULL : ¶m_value.front(), ¶m_value_size); if (!param_value.empty()) extStr = std::string(¶m_value.front(), param_value_size-1); if (std::strstr(extStr.c_str(), "cl_intel_subgroups") == NULL) { printf("No cl_intel_subgroups, Skip!"); return 0; } if(utestclGetKernelSubGroupInfoKHR == NULL) utestclGetKernelSubGroupInfoKHR = (clGetKernelSubGroupInfoKHR_cb*) clGetExtensionFunctionAddressForPlatform(platform,"clGetKernelSubGroupInfoKHR"); if(utestclGetKernelSubGroupInfoKHR == NULL) { printf("Can't find clGetKernelSubGroupInfoKHR"); OCL_ASSERT(0); } return 1; } int cl_check_subgroups_short(void) { if (!cl_check_subgroups()) return 0; std::string extStr; size_t param_value_size; OCL_CALL(clGetDeviceInfo, device, CL_DEVICE_EXTENSIONS, 0, 0, ¶m_value_size); std::vector param_value(param_value_size); OCL_CALL(clGetDeviceInfo, device, CL_DEVICE_EXTENSIONS, param_value_size, param_value.empty() ? NULL : ¶m_value.front(), ¶m_value_size); if (!param_value.empty()) extStr = std::string(¶m_value.front(), param_value_size-1); if (std::strstr(extStr.c_str(), "cl_intel_subgroups_short") == NULL) { printf("No cl_intel_subgroups_short, Skip!"); return 0; } return 1; } int cl_check_media_block_io(void) { if (!cl_check_subgroups()) return 0; std::string extStr; size_t param_value_size; OCL_CALL(clGetDeviceInfo, device, CL_DEVICE_EXTENSIONS, 0, 0, ¶m_value_size); std::vector param_value(param_value_size); OCL_CALL(clGetDeviceInfo, device, CL_DEVICE_EXTENSIONS, param_value_size, param_value.empty() ? NULL : ¶m_value.front(), ¶m_value_size); if (!param_value.empty()) extStr = std::string(¶m_value.front(), param_value_size-1); if (std::strstr(extStr.c_str(), "cl_intel_media_block_io") == NULL) { printf("No cl_intel_media_block_io, Skip!"); return 0; } return 1; } int cl_check_ocl20(bool or_beignet) { size_t param_value_size; size_t ret_sz; OCL_CALL(clGetDeviceInfo, device, CL_DEVICE_OPENCL_C_VERSION, 0, 0, ¶m_value_size); if(param_value_size == 0) { printf("Not OpenCL 2.0 device, "); if(or_beignet){ if(cl_check_beignet()) { printf("Beignet extension test!"); return 1; } else { printf("Not beignet device , Skip!"); return 0; } }else{ printf("Skip!"); return 0; } } char* device_version_str = (char* )malloc(param_value_size * sizeof(char) ); OCL_CALL(clGetDeviceInfo, device, CL_DEVICE_OPENCL_C_VERSION, param_value_size, (void*)device_version_str, &ret_sz); OCL_ASSERT(ret_sz == param_value_size); if(!strstr(device_version_str, "2.0")) { free(device_version_str); printf("Not OpenCL 2.0 device, "); if(or_beignet){ if(cl_check_beignet()) { printf("Beignet extension test!"); return 1; } else { printf("Not beignet device , Skip!"); return 0; } }else{ printf("Skip!"); return 0; } } free(device_version_str); return 1; } int cl_check_half(void) { std::string extStr; size_t param_value_size; OCL_CALL(clGetDeviceInfo, device, CL_DEVICE_EXTENSIONS, 0, 0, ¶m_value_size); std::vector param_value(param_value_size); OCL_CALL(clGetDeviceInfo, device, CL_DEVICE_EXTENSIONS, param_value_size, param_value.empty() ? NULL : ¶m_value.front(), ¶m_value_size); if (!param_value.empty()) extStr = std::string(¶m_value.front(), param_value_size-1); if (std::strstr(extStr.c_str(), "cl_khr_fp16") == NULL) { printf("No cl_khr_fp16, Skip!"); return 0; } return 1; } uint32_t __half_to_float(uint16_t h, bool *isInf, bool *infSign) { uint32_t out_val = 0; uint16_t sign = (h & 0x8000) >> 15; uint16_t exp = (h & 0x7c00) >> 10; uint16_t fraction = h & 0x03ff; if (isInf) *isInf = false; if (infSign) *infSign = false; if (exp == 0 && fraction == 0) { // (Signed) zero return (sign << 31); } if (exp == 0) { // subnormal mode assert(fraction > 0); exp = -1; do { fraction = fraction << 1; exp++; } while ((fraction & 0x400) == 0); exp = 127 - exp - 15; out_val = (sign << 31) | ((exp & 0xff) << 23) | ((fraction & 0x3ff) << 13); return out_val; } if (exp == 0x1f) { // inf or NAN if (fraction == 0) { // inf out_val = (sign << 31) | (255 << 23); if (isInf) *isInf = true; if (infSign) *infSign = (sign == 0) ? 1 : 0; return out_val; } else { // NAN mode out_val = (sign << 31) | (255 << 23) | 0x7fffff; return out_val; } } // Easy case, just convert. exp = 127 - 15 + exp; out_val = (sign << 31) | ((exp & 0xff) << 23) | ((fraction & 0x3ff) << 13); return out_val; } uint16_t __float_to_half(uint32_t x) { uint16_t sign = (x & 0x80000000) >> 31; uint16_t exp = (x & 0x7F800000) >> 23; uint32_t fraction = (x & 0x7fffff); uint16_t out_val = 0; /* Handle the float NAN format. */ if (exp == 0xFF && fraction != 0) { /* return a NAN half. */ out_val = (sign << 15) | (0x7C00) | (fraction & 0x3ff); return out_val; } /* Float exp is from -126~127, half exp is from -14~15 */ if (exp - 127 > 15) { // Should overflow. /* return +- inf. */ out_val = (sign << 15) | (0x7C00); return out_val; } /* half has 10 bits fraction, so have chance to convet to (-1)^sign X 2^(-14) X 0.fraction form. But if the exp - 127 < -14 - 10, we must have unerflow. */ if (exp < -14 + 127 - 10) { // Should underflow. /* Return zero without subnormal numbers. */ out_val = (sign << 15); return out_val; } if (exp < -14 + 127) { //May underflow, but may use subnormal numbers int shift = -(exp - 127 + 14); assert(shift > 0); assert(shift <= 10); fraction = fraction | 0x0800000; // in 1.significantbits2, add the 1 fraction = fraction >> shift; // To half fraction fraction = (fraction & 0x7ff000) >> 12; out_val = (sign << 15) | ((fraction >> 1) & 0x3ff); if (fraction & 0x01) out_val++; return out_val; } /* Easy case, just convert. */ fraction = (fraction & 0x7ff000) >> 12; exp = exp - 127 + 15; assert(exp > 0); assert(exp < 0x01f); out_val = (sign << 15) | (exp << 10) | ((fraction >> 1) & 0x3ff); if (fraction & 0x01) out_val++; return out_val; } uint32_t as_uint(float f) { union uint32_cast _tmp; _tmp._float = f; return _tmp._uint; } float as_float(uint32_t i) { union uint32_cast _tmp; _tmp._uint = i; return _tmp._float; }