summaryrefslogtreecommitdiff
path: root/src/cl_command_queue.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/cl_command_queue.c')
-rw-r--r--src/cl_command_queue.c179
1 files changed, 10 insertions, 169 deletions
diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c
index 9c1dab39..76170a56 100644
--- a/src/cl_command_queue.c
+++ b/src/cl_command_queue.c
@@ -103,168 +103,6 @@ cl_command_queue_add_ref(cl_command_queue queue)
atomic_inc(&queue->ref_n);
}
-static void
-cl_kernel_copy_image_parameters(cl_kernel k, cl_mem mem, int index, char *curbe)
-{
- cl_curbe_patch_info_t *info = NULL;
- uint64_t key;
- assert(curbe && mem && mem->is_image);
-
- key = cl_curbe_key(DATA_PARAMETER_IMAGE_WIDTH, index, 0);
- if ((info = cl_kernel_get_curbe_info(k, key)) != NULL)
- memcpy(curbe+info->offsets[0], &mem->w, sizeof(uint32_t));
- key = cl_curbe_key(DATA_PARAMETER_IMAGE_HEIGHT, index, 0);
- if ((info = cl_kernel_get_curbe_info(k, key)) != NULL)
- memcpy(curbe+info->offsets[0], &mem->h, sizeof(uint32_t));
- key = cl_curbe_key(DATA_PARAMETER_IMAGE_DEPTH, index, 0);
- if ((info = cl_kernel_get_curbe_info(k, key)) != NULL)
- memcpy(curbe+info->offsets[0], &mem->depth, sizeof(uint32_t));
- key = cl_curbe_key(DATA_PARAMETER_IMAGE_CHANNEL_DATA_TYPE, index, 0);
- if ((info = cl_kernel_get_curbe_info(k, key)) != NULL)
- memcpy(curbe+info->offsets[0], &mem->fmt.image_channel_data_type, sizeof(uint32_t));
- key = cl_curbe_key(DATA_PARAMETER_IMAGE_CHANNEL_ORDER, index, 0);
- if ((info = cl_kernel_get_curbe_info(k, key)) != NULL)
- memcpy(curbe+info->offsets[0], &mem->fmt.image_channel_order, sizeof(uint32_t));
-}
-
-LOCAL cl_int
-cl_command_queue_bind_surface(cl_command_queue queue,
- cl_kernel k,
- char *curbe,
- drm_intel_bo **local,
- drm_intel_bo **priv,
- drm_intel_bo **scratch,
- uint32_t local_sz)
-{
- cl_context ctx = queue->ctx;
- intel_gpgpu_t *gpgpu = queue->gpgpu;
- drm_intel_bufmgr *bufmgr = cl_context_get_intel_bufmgr(ctx);
- cl_mem mem = NULL;
- drm_intel_bo *bo = NULL, *sync_bo = NULL;
- const size_t max_thread = ctx->device->max_compute_unit;
- cl_int err = CL_SUCCESS;
- uint32_t i, index;
-
- /* Bind user defined surface */
- for (i = 0; i < k->arg_info_n; ++i) {
- assert(k->arg_info[i].offset % SURFACE_SZ == 0);
- index = k->arg_info[i].offset / SURFACE_SZ;
- mem = (cl_mem) k->args[k->arg_info[i].arg_index];
- assert(index != MAX_SURFACES - 1);
- CHECK_MEM(mem);
- bo = mem->bo;
- assert(bo);
- if (mem->is_image) {
- const int32_t w = mem->w, h = mem->h, pitch = mem->pitch;
- const uint32_t fmt = mem->intel_fmt;
- gpgpu_tiling_t tiling = GPGPU_NO_TILE;
- if (mem->tiling == CL_TILE_X)
- tiling = GPGPU_TILE_X;
- else if (mem->tiling == CL_TILE_Y)
- tiling = GPGPU_TILE_Y;
- gpgpu_bind_image2D(gpgpu, index, bo, fmt, w, h, pitch, tiling);
-
- /* Copy the image parameters (width, height) in the constant buffer if the
- * user requests them
- */
- cl_kernel_copy_image_parameters(k, mem, index, curbe);
- } else
- gpgpu_bind_buf(gpgpu, index, bo, cc_llc_l3);
- }
-
- /* Allocate the constant surface (if any) */
- if (k->const_bo) {
- assert(k->const_bo_index != MAX_SURFACES - 1);
- gpgpu_bind_buf(gpgpu, k->const_bo_index,
- k->const_bo,
- cc_llc_l3);
- }
-
- /* Allocate local surface needed for SLM and bind it */
- if (local && local_sz != 0) {
- const size_t sz = 16 * local_sz; /* XXX 16 == maximum barrier number */
- assert(k->patch.local_surf.offset % SURFACE_SZ == 0);
- index = k->patch.local_surf.offset / SURFACE_SZ;
- assert(index != MAX_SURFACES - 1);
- *local = drm_intel_bo_alloc(bufmgr, "CL local surface", sz, 64);
- gpgpu_bind_buf(gpgpu, index, *local, cc_llc_l3);
- }
- else if (local)
- *local = NULL;
-
- /* Allocate private surface and bind it */
- if (priv && k->patch.private_surf.size != 0) {
- const size_t sz = max_thread *
- k->patch.private_surf.size *
- k->patch.exec_env.largest_compiled_simd_sz;
- // assert(k->patch.exec_env.largest_compiled_simd_sz == 16);
- assert(k->patch.private_surf.offset % SURFACE_SZ == 0);
- index = k->patch.private_surf.offset / SURFACE_SZ;
- assert(index != MAX_SURFACES - 1);
- *priv = drm_intel_bo_alloc(bufmgr, "CL private surface", sz, 64);
- gpgpu_bind_buf(gpgpu, index, *priv, cc_llc_l3);
- }
- else if(priv)
- *priv = NULL;
-
- /* Allocate scratch surface and bind it */
- if (scratch && k->patch.scratch.size != 0) {
- const size_t sz = max_thread * /* XXX is it given per lane ??? */
- k->patch.scratch.size *
- k->patch.exec_env.largest_compiled_simd_sz;
- // assert(k->patch.exec_env.largest_compiled_simd_sz == 16);
- assert(k->patch.scratch.offset % SURFACE_SZ == 0);
- assert(index != MAX_SURFACES - 1);
- index = k->patch.scratch.offset / SURFACE_SZ;
- *scratch = drm_intel_bo_alloc(bufmgr, "CL scratch surface", sz, 64);
- gpgpu_bind_buf(gpgpu, index, *scratch, cc_llc_l3);
- }
- else if (scratch)
- *scratch = NULL;
-
- /* Now bind a bo used for synchronization */
- sync_bo = drm_intel_bo_alloc(bufmgr, "sync surface", 64, 64);
- gpgpu_bind_buf(gpgpu, MAX_SURFACES-1, sync_bo, cc_llc_l3);
- if (queue->last_batch != NULL)
- drm_intel_bo_unreference(queue->last_batch);
- queue->last_batch = sync_bo;
-
-error:
- assert(err == CL_SUCCESS); /* Cannot fail here */
- return err;
-}
-
-LOCAL cl_int
-cl_kernel_check_args(cl_kernel k)
-{
- uint32_t i;
- for (i = 0; i < k->arg_n; ++i)
- if (k->is_provided[i] == CL_FALSE)
- return CL_INVALID_KERNEL_ARGS;
- return CL_SUCCESS;
-}
-
-LOCAL cl_int
-cl_command_queue_set_report_buffer(cl_command_queue queue, cl_mem mem)
-{
- cl_int err = CL_SUCCESS;
- if (queue->perf != NULL) {
- cl_mem_delete(queue->perf);
- queue->perf = NULL;
- }
- if (mem != NULL) {
- if (drm_intel_bo_get_size(mem->bo) < 1024) { /* 1K for the performance counters is enough */
- err = CL_INVALID_BUFFER_SIZE;
- goto error;
- }
- cl_mem_add_ref(mem);
- queue->perf = mem;
- }
-
-error:
- return err;
-}
-
#if USE_FULSIM
extern void drm_intel_bufmgr_gem_stop_aubfile(drm_intel_bufmgr*);
extern void drm_intel_bufmgr_gem_set_aubfile(drm_intel_bufmgr*, FILE*);
@@ -303,8 +141,9 @@ static const size_t chunk_sz = 8192u;
static cl_int
cl_fulsim_dump_all_surfaces(cl_command_queue queue, cl_kernel k)
{
- cl_mem mem = NULL;
cl_int err = CL_SUCCESS;
+#if 0
+ cl_mem mem = NULL;
int i;
size_t j;
@@ -323,6 +162,7 @@ cl_fulsim_dump_all_surfaces(cl_command_queue queue, cl_kernel k)
aub_exec_dump_raw_file(mem->bo, chunk_n * chunk_sz, chunk_remainder);
}
error:
+#endif
return err;
}
@@ -345,6 +185,7 @@ struct bmphdr {
/* raw b, g, r data here, dword aligned per scan line */
};
+#if 0
static int*
cl_read_bmp(const char *filename, int *width, int *height)
{
@@ -426,16 +267,17 @@ cl_read_dump(const char *name, size_t *size)
*size = sz;
return dump;
}
+#endif
static cl_int
cl_fulsim_read_all_surfaces(cl_command_queue queue, cl_kernel k)
{
+ cl_int err = CL_SUCCESS;
+#if 0
cl_mem mem = NULL;
char *from = NULL, *to = NULL;
size_t size, j, chunk_n, chunk_remainder;
- cl_int err = CL_SUCCESS;
int i, curr = 0;
-
/* Bind user defined surface */
for (i = 0; i < k->arg_info_n; ++i) {
if (k->arg_info[i].type != OCLRT_ARG_TYPE_BUFFER)
@@ -475,11 +317,12 @@ cl_fulsim_read_all_surfaces(cl_command_queue queue, cl_kernel k)
cl_mem_unmap(mem);
}
error:
+#endif
return err;
+
}
#endif /* USE_FULSIM */
-extern cl_int cl_command_queue_ND_range_gen6(cl_command_queue, cl_kernel, const size_t*, const size_t*, const size_t*);
extern cl_int cl_command_queue_ND_range_gen7(cl_command_queue, cl_kernel, const size_t *, const size_t *, const size_t *);
LOCAL cl_int
@@ -501,9 +344,7 @@ cl_command_queue_ND_range(cl_command_queue queue,
drm_intel_bufmgr_gem_set_aubfile(bufmgr, file);
#endif /* USE_FULSIM */
- if (ver == 6)
- TRY (cl_command_queue_ND_range_gen6, queue, k, global_wk_off, global_wk_sz, local_wk_sz);
- else if (ver == 7 || ver == 75)
+ if (ver == 7 || ver == 75)
TRY (cl_command_queue_ND_range_gen7, queue, k, global_wk_off, global_wk_sz, local_wk_sz);
else
FATAL ("Unknown Gen Device");