summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYang Rong <rong.r.yang@intel.com>2015-01-29 10:59:13 +0800
committerYang Rong <rong.r.yang@intel.com>2015-01-29 16:14:06 +0800
commit49c7ac32307460f6a00480541f7e2c11f1613b36 (patch)
tree5262f9663ff44e6e47d7276e8eb0207770371ef9
parent1b38728fa476b42ae9b8dc4b2805c9a03eb04484 (diff)
SKL: enable skl device.
Add the intel_gpgpu_set_base_address_gen9 for SKL, the other functions are same as BDW in intel_GPGPU. And the SKL's backend just same as BDW. Should derive from GEN8 later. With this commit, some utests pass.
-rw-r--r--backend/src/backend/gen/gen_mesa_disasm.c2
-rw-r--r--backend/src/backend/gen_program.cpp9
-rw-r--r--backend/src/gbe_bin_generater.cpp4
-rw-r--r--src/cl_command_queue.c2
-rw-r--r--src/intel/intel_driver.c4
-rw-r--r--src/intel/intel_gpgpu.c70
6 files changed, 87 insertions, 4 deletions
diff --git a/backend/src/backend/gen/gen_mesa_disasm.c b/backend/src/backend/gen/gen_mesa_disasm.c
index 640c0fe2..7465bba2 100644
--- a/backend/src/backend/gen/gen_mesa_disasm.c
+++ b/backend/src/backend/gen/gen_mesa_disasm.c
@@ -1154,6 +1154,8 @@ int gen_disasm (FILE *file, const void *inst, uint32_t deviceID, uint32_t compac
gen_version = 75;
} else if (IS_BROADWELL(deviceID)) {
gen_version = 80;
+ } else if (IS_SKYLAKE(deviceID)) {
+ gen_version = 90;
}
if (PRED_CTRL(inst)) {
diff --git a/backend/src/backend/gen_program.cpp b/backend/src/backend/gen_program.cpp
index 2e391e1e..01e7ee91 100644
--- a/backend/src/backend/gen_program.cpp
+++ b/backend/src/backend/gen_program.cpp
@@ -170,6 +170,8 @@ namespace gbe {
ctx = GBE_NEW(Gen75Context, unit, name, deviceID, relaxMath);
} else if (IS_BROADWELL(deviceID)) {
ctx = GBE_NEW(Gen8Context, unit, name, deviceID, relaxMath);
+ } else if (IS_SKYLAKE(deviceID)) {
+ ctx = GBE_NEW(Gen8Context, unit, name, deviceID, relaxMath);
}
GBE_ASSERTM(ctx != NULL, "Fail to create the gen context\n");
@@ -211,7 +213,8 @@ namespace gbe {
(IS_IVYBRIDGE(typeA) && !strcmp(src_hw_info, "BYT")) || \
(IS_BAYTRAIL_T(typeA) && !strcmp(src_hw_info, "BYT")) || \
(IS_HASWELL(typeA) && !strcmp(src_hw_info, "HSW")) || \
- (IS_BROADWELL(typeA) && !strcmp(src_hw_info, "BDW")) )
+ (IS_BROADWELL(typeA) && !strcmp(src_hw_info, "BDW")) || \
+ (IS_SKYLAKE(typeA) && !strcmp(src_hw_info, "SKL")) )
static gbe_program genProgramNewFromBinary(uint32_t deviceID, const char *binary, size_t size) {
using namespace gbe;
@@ -306,6 +309,10 @@ namespace gbe {
src_hw_info[0]='B';
src_hw_info[1]='D';
src_hw_info[2]='W';
+ }else if(IS_SKYLAKE(prog->deviceID)){
+ src_hw_info[0]='S';
+ src_hw_info[1]='K';
+ src_hw_info[2]='L';
}
FILL_DEVICE_ID(*binary, src_hw_info);
memcpy(*binary+BINARY_HEADER_LENGTH, oss.str().c_str(), sz*sizeof(char));
diff --git a/backend/src/gbe_bin_generater.cpp b/backend/src/gbe_bin_generater.cpp
index f4be488d..8d411137 100644
--- a/backend/src/gbe_bin_generater.cpp
+++ b/backend/src/gbe_bin_generater.cpp
@@ -178,6 +178,10 @@ void program_build_instance::serialize_program(void) throw(int)
src_hw_info[0]='B';
src_hw_info[1]='D';
src_hw_info[2]='W';
+ }else if(IS_SKYLAKE(gen_pci_id)){
+ src_hw_info[0]='S';
+ src_hw_info[1]='K';
+ src_hw_info[2]='L';
}
if (str_fmt_out) {
diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c
index f8435485..3c04d6d4 100644
--- a/src/cl_command_queue.c
+++ b/src/cl_command_queue.c
@@ -207,7 +207,7 @@ cl_command_queue_ND_range(cl_command_queue queue,
/* Check that the user did not forget any argument */
TRY (cl_kernel_check_args, k);
- if (ver == 7 || ver == 75 || ver == 8)
+ if (ver == 7 || ver == 75 || ver == 8 || ver == 9)
TRY (cl_command_queue_ND_range_gen7, queue, k, work_dim, global_wk_off, global_wk_sz, local_wk_sz);
else
FATAL ("Unknown Gen Device");
diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c
index 21546d98..9e989b6f 100644
--- a/src/intel/intel_driver.c
+++ b/src/intel/intel_driver.c
@@ -170,7 +170,9 @@ intel_driver_init(intel_driver_t *driver, int dev_fd)
else
FATAL ("Unsupported Gen for emulation");
#else
- if (IS_GEN8(driver->device_id))
+ if (IS_GEN9(driver->device_id))
+ driver->gen_ver = 9;
+ else if (IS_GEN8(driver->device_id))
driver->gen_ver = 8;
else if (IS_GEN75(driver->device_id))
driver->gen_ver = 75;
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
index 479077c8..4c095b9c 100644
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -347,6 +347,55 @@ intel_gpgpu_set_base_address_gen8(intel_gpgpu_t *gpgpu)
ADVANCE_BATCH(gpgpu->batch);
}
+static void
+intel_gpgpu_set_base_address_gen9(intel_gpgpu_t *gpgpu)
+{
+ const uint32_t def_cc = cl_gpgpu_get_cache_ctrl(); /* default Cache Control value */
+ BEGIN_BATCH(gpgpu->batch, 19);
+ OUT_BATCH(gpgpu->batch, CMD_STATE_BASE_ADDRESS | 17);
+ /* 0, Gen State Mem Obj CC, Stateless Mem Obj CC, Stateless Access Write Back */
+ OUT_BATCH(gpgpu->batch, 0 | (def_cc << 4) | (0 << 1)| BASE_ADDRESS_MODIFY); /* General State Base Addr */
+ OUT_BATCH(gpgpu->batch, 0);
+ OUT_BATCH(gpgpu->batch, 0 | (def_cc << 16));
+ /* 0, State Mem Obj CC */
+ /* We use a state base address for the surface heap since IVB clamp the
+ * binding table pointer at 11 bits. So, we cannot use pointers directly while
+ * using the surface heap
+ */
+ assert(gpgpu->aux_offset.surface_heap_offset % 4096 == 0);
+ OUT_RELOC(gpgpu->batch, gpgpu->aux_buf.bo,
+ I915_GEM_DOMAIN_SAMPLER,
+ I915_GEM_DOMAIN_SAMPLER,
+ gpgpu->aux_offset.surface_heap_offset + (0 | (def_cc << 4) | (0 << 1)| BASE_ADDRESS_MODIFY));
+ OUT_BATCH(gpgpu->batch, 0);
+ OUT_RELOC(gpgpu->batch, gpgpu->aux_buf.bo,
+ I915_GEM_DOMAIN_RENDER,
+ I915_GEM_DOMAIN_RENDER,
+ (0 | (def_cc << 4) | (0 << 1)| BASE_ADDRESS_MODIFY)); /* Dynamic State Base Addr */
+ OUT_BATCH(gpgpu->batch, 0);
+ OUT_BATCH(gpgpu->batch, 0 | (def_cc << 4) | BASE_ADDRESS_MODIFY); /* Indirect Obj Base Addr */
+ OUT_BATCH(gpgpu->batch, 0);
+ //OUT_BATCH(gpgpu->batch, 0 | (def_cc << 4) | BASE_ADDRESS_MODIFY); /* Instruction Base Addr */
+ OUT_RELOC(gpgpu->batch, (drm_intel_bo *)gpgpu->ker->bo,
+ I915_GEM_DOMAIN_INSTRUCTION,
+ I915_GEM_DOMAIN_INSTRUCTION,
+ 0 + (0 | (def_cc << 4) | (0 << 1)| BASE_ADDRESS_MODIFY));
+ OUT_BATCH(gpgpu->batch, 0);
+
+ OUT_BATCH(gpgpu->batch, 0xfffff000 | BASE_ADDRESS_MODIFY);
+ /* According to mesa i965 driver code, we must set the dynamic state access upper bound
+ * to a valid bound value, otherwise, the border color pointer may be rejected and you
+ * may get incorrect border color. This is a known hardware bug. */
+ OUT_BATCH(gpgpu->batch, 0xfffff000 | BASE_ADDRESS_MODIFY);
+ OUT_BATCH(gpgpu->batch, 0xfffff000 | BASE_ADDRESS_MODIFY);
+ OUT_BATCH(gpgpu->batch, 0xfffff000 | BASE_ADDRESS_MODIFY);
+ /* Bindless surface state base address */
+ OUT_BATCH(gpgpu->batch, (def_cc << 4) | BASE_ADDRESS_MODIFY);
+ OUT_BATCH(gpgpu->batch, 0);
+ OUT_BATCH(gpgpu->batch, 0xfffff000);
+ ADVANCE_BATCH(gpgpu->batch);
+}
+
uint32_t intel_gpgpu_get_scratch_index_gen7(uint32_t size) {
return size / 1024 - 1;
}
@@ -1054,7 +1103,8 @@ static uint32_t get_surface_type(intel_gpgpu_t *gpgpu, int index, cl_mem_object_
uint32_t surface_type;
if (((IS_IVYBRIDGE(gpgpu->drv->device_id) ||
IS_HASWELL(gpgpu->drv->device_id) ||
- IS_BROADWELL(gpgpu->drv->device_id))) &&
+ IS_BROADWELL(gpgpu->drv->device_id) ||
+ IS_SKYLAKE(gpgpu->drv->device_id))) &&
index >= BTI_WORKAROUND_IMAGE_OFFSET + BTI_RESERVED_NUM &&
type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
surface_type = I965_SURFACE_2D;
@@ -1960,6 +2010,24 @@ intel_set_gpgpu_callbacks(int device_id)
intel_gpgpu_pipe_control = intel_gpgpu_pipe_control_gen7;
return;
}
+ if (IS_SKYLAKE(device_id)) {
+ cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_gen8;
+ intel_gpgpu_set_L3 = intel_gpgpu_set_L3_gen8;
+ cl_gpgpu_get_cache_ctrl = (cl_gpgpu_get_cache_ctrl_cb *)intel_gpgpu_get_cache_ctrl_gen8;
+ intel_gpgpu_get_scratch_index = intel_gpgpu_get_scratch_index_gen8;
+ intel_gpgpu_post_action = intel_gpgpu_post_action_gen7; //BDW need not restore SLM, same as gen7
+ intel_gpgpu_read_ts_reg = intel_gpgpu_read_ts_reg_gen7;
+ intel_gpgpu_set_base_address = intel_gpgpu_set_base_address_gen9;
+ intel_gpgpu_setup_bti = intel_gpgpu_setup_bti_gen8;
+ intel_gpgpu_load_vfe_state = intel_gpgpu_load_vfe_state_gen8;
+ cl_gpgpu_walker = (cl_gpgpu_walker_cb *)intel_gpgpu_walker_gen8;
+ intel_gpgpu_build_idrt = intel_gpgpu_build_idrt_gen8;
+ intel_gpgpu_load_curbe_buffer = intel_gpgpu_load_curbe_buffer_gen8;
+ intel_gpgpu_load_idrt = intel_gpgpu_load_idrt_gen8;
+ cl_gpgpu_bind_sampler = (cl_gpgpu_bind_sampler_cb *) intel_gpgpu_bind_sampler_gen8;
+ intel_gpgpu_pipe_control = intel_gpgpu_pipe_control_gen7;
+ return;
+ }
intel_gpgpu_set_base_address = intel_gpgpu_set_base_address_gen7;
intel_gpgpu_load_vfe_state = intel_gpgpu_load_vfe_state_gen7;