diff options
Diffstat (limited to 'backend')
-rw-r--r-- | backend/src/driver/cl_gen_command_queue.cpp | 14 | ||||
-rw-r--r-- | backend/src/driver/cl_gen_driver.c | 1 | ||||
-rw-r--r-- | backend/src/driver/cl_gen_driver.h | 1 | ||||
-rw-r--r-- | backend/src/driver/cl_gen_kernel.cpp | 4 | ||||
-rw-r--r-- | backend/src/driver/cl_gen_mem.cpp | 10 |
5 files changed, 21 insertions, 9 deletions
diff --git a/backend/src/driver/cl_gen_command_queue.cpp b/backend/src/driver/cl_gen_command_queue.cpp index 1616487e..8d210b51 100644 --- a/backend/src/driver/cl_gen_command_queue.cpp +++ b/backend/src/driver/cl_gen_command_queue.cpp @@ -32,6 +32,9 @@ extern "C" { // for the C header files bool GenGPUWorkItem::setStatus(cl_int status) { + GBE_ASSERT(status < this->state); //Should never go back. + this->state = status; +printf("SSSSSSSSSSSSset state to %d\n", status); if (this->event == NULL) return false; @@ -60,7 +63,7 @@ cl_int GenGPUWorkItem::isReady(void) pthread_mutex_lock(&e->lock); if (e->status > CL_COMPLETE) { pthread_mutex_unlock(&e->lock); - return 1; + return 0; } if (e->status < 0) { pthread_mutex_unlock(&e->lock); @@ -69,12 +72,13 @@ cl_int GenGPUWorkItem::isReady(void) pthread_mutex_unlock(&e->lock); } - return 0; + return 1; } GenGPUWorkItem::GenGPUWorkItem(cl_event event, const cl_event* dependEvents, cl_uint num_events) { - this->state = -1; + this->state = CL_QUEUED; + this->event = NULL; if (event) this->event = event; @@ -105,7 +109,7 @@ static void *workerFunction(void *Arg) /* Get the first available one and execute it. */ for (list<GenGPUWorkItem*>::iterator it = gpuCmdQueue->workItems.begin(); it != gpuCmdQueue->workItems.end(); it++) { - if ((*it)->isReady() == true) { + if ((*it)->isReady()) { readyOne = *it; gpuCmdQueue->workItems.erase(it); break; @@ -118,6 +122,7 @@ static void *workerFunction(void *Arg) /* We execute it without lock. */ gpuCmdQueue->inExec = true; + printf("BBBBBBBBBBBBBBBBBBBBBBBefore do something\n"); pthread_mutex_unlock(&gpuCmdQueue->mutex); if (readyOne->state == CL_QUEUED) { if (readyOne->submit() == true) { @@ -127,6 +132,7 @@ static void *workerFunction(void *Arg) readyOne->setStatus(-1); // Set a negative value to cancel all. } } + printf("00000000000000000000000000000000000\n"); if (readyOne->state == CL_RUNNING || readyOne->state == CL_SUBMITTED) { if (readyOne->complete() == true) { readyOne->setStatus(CL_COMPLETE); diff --git a/backend/src/driver/cl_gen_driver.c b/backend/src/driver/cl_gen_driver.c index c8f962f7..36542910 100644 --- a/backend/src/driver/cl_gen_driver.c +++ b/backend/src/driver/cl_gen_driver.c @@ -36,6 +36,7 @@ _cl_driver clgenDriver = { .get_arg_name = GenGetArgName, .get_arg_type_name = GenGetArgTypeName, .get_arg_info = GenGetArgInfo, + .get_workgroup_info = GenGetWorkgroupInfo, .create_buffer = GenCreateBuffer, .release_mem = GenReleaseMem, .enqueue_map_buffer = GenEnqueueMapBuffer, diff --git a/backend/src/driver/cl_gen_driver.h b/backend/src/driver/cl_gen_driver.h index ba572a42..27e3b188 100644 --- a/backend/src/driver/cl_gen_driver.h +++ b/backend/src/driver/cl_gen_driver.h @@ -50,6 +50,7 @@ cl_int GenGetArgTypeName(cl_kernel kernel, const cl_device_id device, cl_uint in cl_int GenGetArgInfo(cl_kernel kernel, const cl_device_id device, cl_uint index, size_t* size, cl_kernel_arg_type *type, cl_kernel_arg_address_qualifier *qualifier, cl_kernel_arg_access_qualifier *access, cl_kernel_arg_type_qualifier *type_qualifier); +cl_int GenGetWorkgroupInfo(cl_kernel kernel, const cl_device_id device, cl_kernel_workgroup_info wgInfo); cl_int GenCreateBuffer(cl_mem mem, const cl_device_id device); cl_int GenReleaseMem(cl_mem mem, const cl_device_id device); cl_int GenEnqueueMapBuffer(cl_command_queue queue, cl_mem mem, void** ret_addr, cl_bool block, diff --git a/backend/src/driver/cl_gen_kernel.cpp b/backend/src/driver/cl_gen_kernel.cpp index c9ad3a39..cfbb056d 100644 --- a/backend/src/driver/cl_gen_kernel.cpp +++ b/backend/src/driver/cl_gen_kernel.cpp @@ -312,14 +312,18 @@ GenGPUWorkItemNDRange::GenGPUWorkItemNDRange(dri_bufmgr *bufmgr, drm_intel_conte bool GenGPUWorkItemNDRange::submit(void) { bool ret; + printf("!!!!!!!!!!!!!!! before submit\n"); ret = this->gpuState->flush(); + printf("!!!!!!!!!!!!!!! After submit\n"); return ret; } bool GenGPUWorkItemNDRange::complete(void) { /* Wait it to complete. */ + printf("beforeooooooo___________________________ complete\n"); this->gpuState->sync(); + printf("___________________________ complete\n"); return true; } diff --git a/backend/src/driver/cl_gen_mem.cpp b/backend/src/driver/cl_gen_mem.cpp index d5198937..9f40e0df 100644 --- a/backend/src/driver/cl_gen_mem.cpp +++ b/backend/src/driver/cl_gen_mem.cpp @@ -288,15 +288,18 @@ cl_int GenEnqueueUnmapMem(cl_command_queue queue, cl_mem mem, void *mapped_ptr, if (genMem == NULL) return CL_INVALID_VALUE; + /* From here, we want to lock. */ + GenGPULockerHelper mutexAlloc(&genMem->mutex); - // FIXME: Have no choice but to lock mem again. - pthread_mutex_lock(&mem->lock); /* The fake CL_MEM_USE_HOST_PTR case, we need to copy back the data to GPU. */ if ((mem->flags & CL_MEM_USE_HOST_PTR) && genMem->alignedHostPtr == NULL) { int i = 0; size_t mapped_size = 0; size_t mapped_offset = 0; size_t origin[3], region[3]; + + // FIXME: Have no choice but to lock mem again. + pthread_mutex_lock(&mem->lock); for (; i < mem->mapped_ptr_sz; i++) { if (mem->mapped_ptr[i].ptr == mapped_ptr) { mem->mapped_ptr[i].ptr = NULL; @@ -315,9 +318,6 @@ cl_int GenEnqueueUnmapMem(cl_command_queue queue, cl_mem mem, void *mapped_ptr, } pthread_mutex_unlock(&mem->lock); - /* From here, we want to lock. */ - GenGPULockerHelper mutexAlloc(&genMem->mutex); - if (mem->type != CL_MEM_OBJECT_BUFFER) { cl_mem_buffer buffer = cl_mem_to_buffer(mem); GBE_ASSERT((char*)mapped_ptr >= (char*)mem->host_ptr + buffer->sub_offset); |