summaryrefslogtreecommitdiff
path: root/backend/src
diff options
context:
space:
mode:
authorJunyan He <junyan.he@intel.com>2016-03-08 17:52:03 +0800
committerJunyan He <junyan.he@intel.com>2016-03-08 17:52:03 +0800
commit888aae548a577efc1b84449ef51c03cf208b57e8 (patch)
treed8c7122071bce91fbce42337c51150a0486eed1d /backend/src
parentad2679c7b24107edb37d5cb83d7923ccda8a030c (diff)
d
Diffstat (limited to 'backend/src')
-rw-r--r--backend/src/driver/cl_gen_device_id.cpp28
-rw-r--r--backend/src/driver/cl_gen_gpu_defines.h40
-rw-r--r--backend/src/driver/cl_gen_gpu_state.cpp80
-rw-r--r--backend/src/driver/cl_gen_gpu_state.h28
4 files changed, 142 insertions, 34 deletions
diff --git a/backend/src/driver/cl_gen_device_id.cpp b/backend/src/driver/cl_gen_device_id.cpp
index a36c87e4..6eec6e14 100644
--- a/backend/src/driver/cl_gen_device_id.cpp
+++ b/backend/src/driver/cl_gen_device_id.cpp
@@ -59,7 +59,7 @@ static cl_device_id gen_device;
static _cl_extensions gen_device_extensions;
static char gen_ext_string[CL_MAX_EXTENSION_LENGTH];
-struct GpgpuDevice {
+struct GenGPUDevice {
dri_bufmgr *bufmgr;
int fd;
bool from_x11;
@@ -68,19 +68,19 @@ struct GpgpuDevice {
cl_uint max_thread_per_unit;
cl_uint sub_slice_count;
cl_ulong scratch_mem_size;
- GpgpuDevice();
- ~GpgpuDevice();
+ GenGPUDevice();
+ ~GenGPUDevice();
};
-static GpgpuDevice* getGPUDevice(cl_device_id device)
+static GenGPUDevice* getGPUDevice(cl_device_id device)
{
- return reinterpret_cast<GpgpuDevice*>(getGenDevicePrivate(device));
+ return reinterpret_cast<GenGPUDevice*>(getGenDevicePrivate(device));
}
/* just used for maximum relocation number in drm_intel */
#define BATCH_SIZE 0x4000
-static int gpgpuDeviceInit(GpgpuDevice *gpu)
+static int gpgpuDeviceInit(GenGPUDevice *gpu)
{
gpu->bufmgr = drm_intel_bufmgr_gem_init(gpu->fd, BATCH_SIZE);
if (!gpu->bufmgr)
@@ -116,7 +116,7 @@ static int gpgpuDeviceInit(GpgpuDevice *gpu)
return 1;
}
-static int gpgpuDeviceOpenRender(GpgpuDevice *gpu)
+static int gpgpuDeviceOpenRender(GenGPUDevice *gpu)
{
int cardi;
int dev_fd;
@@ -149,7 +149,7 @@ static int gpgpuDeviceOpenRender(GpgpuDevice *gpu)
return 0;
}
-static int gpgpuDeviceOpenMaster(GpgpuDevice *gpu)
+static int gpgpuDeviceOpenMaster(GenGPUDevice *gpu)
{
int cardi;
int dev_fd, ret;
@@ -195,7 +195,7 @@ static int gpgpuDeviceOpenMaster(GpgpuDevice *gpu)
return 0;
}
-static int gpgpuDeviceOpenX11(GpgpuDevice *gpu)
+static int gpgpuDeviceOpenX11(GenGPUDevice *gpu)
{
gpu->fd = dri2OpenX11();
if (gpu->fd >= 0) {
@@ -214,7 +214,7 @@ static int gpgpuDeviceOpenX11(GpgpuDevice *gpu)
return 0;
}
-static void gpgpuDeviceClose(GpgpuDevice *gpu)
+static void gpgpuDeviceClose(GenGPUDevice *gpu)
{
if (gpu->bufmgr)
drm_intel_bufmgr_destroy(gpu->bufmgr);
@@ -231,7 +231,7 @@ static void gpgpuDeviceClose(GpgpuDevice *gpu)
}
}
-GpgpuDevice::GpgpuDevice(void) : bufmgr(NULL), fd(-1), from_x11(0), device_id(0),
+GenGPUDevice::GenGPUDevice(void) : bufmgr(NULL), fd(-1), from_x11(0), device_id(0),
gen_ver(0), max_thread_per_unit(0), sub_slice_count(0), scratch_mem_size(0)
{
if (!gpgpuDeviceOpenX11(this) && !gpgpuDeviceOpenRender(this)
@@ -246,12 +246,12 @@ GpgpuDevice::GpgpuDevice(void) : bufmgr(NULL), fd(-1), from_x11(0), device_id(0)
}
}
-GpgpuDevice::~GpgpuDevice(void)
+GenGPUDevice::~GenGPUDevice(void)
{
gpgpuDeviceClose(this);
}
-static void initGenDevice(GpgpuDevice* gpu)
+static void initGenDevice(GenGPUDevice* gpu)
{
int device_id = gpu->device_id;
@@ -723,7 +723,7 @@ cl_int GenDriverInit(cl_platform_id platform)
return CL_SUCCESS;
}
- GpgpuDevice* gpuDev = GBE_NEW(GpgpuDevice);
+ GenGPUDevice* gpuDev = GBE_NEW(GenGPUDevice);
if (gpuDev->gen_ver < 7) {
gen_device = NULL;
GBE_FREE(gpuDev);
diff --git a/backend/src/driver/cl_gen_gpu_defines.h b/backend/src/driver/cl_gen_gpu_defines.h
index 0d5b562b..5289e7d2 100644
--- a/backend/src/driver/cl_gen_gpu_defines.h
+++ b/backend/src/driver/cl_gen_gpu_defines.h
@@ -352,5 +352,43 @@
#define GEN_MAX_VME_STATES 8
#define GEN_MAX_IF_DESC 32
-#endif /* __CL_GEN_GPU_DEFINESI_H__ */
+/* Cache control options for gen7 */
+typedef enum cl_cache_control {
+ cc_gtt = 0x0,
+ cc_l3 = 0x1,
+ cc_llc = 0x2,
+ cc_llc_l3 = 0x3
+} cl_cache_control;
+
+/* L3 Cache control options for gen75 */
+typedef enum cl_l3_cache_control {
+ l3cc_uc = 0x0,
+ l3cc_ec = 0x1
+} cl_l3_cache_control;
+
+/* LLCCC Cache control options for gen75 */
+typedef enum cl_llccc_cache_control {
+ llccc_pte = 0x0<<1,
+ llccc_uc = 0x1<<1,
+ llccc_ec = 0x2<<1,
+ llccc_ucllc = 0x3<<1
+} cl_llccc_cache_control;
+
+/* Target Cache control options for gen8 */
+typedef enum cl_target_cache_control {
+ tcc_ec_only = 0x0<<3,
+ tcc_llc_only = 0x1<<3,
+ tcc_llc_ec = 0x2<<3,
+ tcc_llc_ec_l3 = 0x3<<3
+} cl_target_cache_control;
+
+/* Memory type LLC/ELLC Cache control options for gen8 */
+typedef enum cl_mtllc_cache_control {
+ mtllc_pte = 0x0<<5,
+ mtllc_none = 0x1<<5,
+ mtllc_wt = 0x2<<5,
+ mtllc_wb = 0x3<<5
+} cl_mtllc_cache_control;
+
+#endif /* __CL_GEN_GPU_DEFINESI_H__ */
diff --git a/backend/src/driver/cl_gen_gpu_state.cpp b/backend/src/driver/cl_gen_gpu_state.cpp
index 692874e4..07c5bbe6 100644
--- a/backend/src/driver/cl_gen_gpu_state.cpp
+++ b/backend/src/driver/cl_gen_gpu_state.cpp
@@ -25,14 +25,14 @@ struct surface_heap {
char surface[256*sizeof(gen_surface_state_t)];
};
-GenGpuState::GenGpuState(dri_bufmgr *bufmgr, drm_intel_context *ctx)
+GenGPUState::GenGPUState(dri_bufmgr *bufmgr, drm_intel_context *ctx)
{
- memset(this, 0, sizeof(GenGpuState));
+ memset(this, 0, sizeof(GenGPUState));
this->bufmgr = bufmgr;
this->ctx = ctx;
}
-GenGpuState::~GenGpuState(void)
+GenGPUState::~GenGPUState(void)
{
if(this->time_stamp_b.bo) {
drm_intel_bo_unreference(this->time_stamp_b.bo);
@@ -69,13 +69,13 @@ GenGpuState::~GenGpuState(void)
}
}
-void GenGpuState::sync(void)
+void GenGPUState::sync(void)
{
if (batchbuf)
drm_intel_bo_wait_rendering((drm_intel_bo *)batchbuf->buffer);
}
-void GenGpuState::bindBuf(drm_intel_bo *buf, uint32_t offset,
+void GenGPUState::bindBuf(drm_intel_bo *buf, uint32_t offset,
uint32_t internal_offset, size_t size, uint8_t bti)
{
GBE_ASSERT(this->binded_n < max_buf_n);
@@ -86,13 +86,13 @@ void GenGpuState::bindBuf(drm_intel_bo *buf, uint32_t offset,
this->setupBTI(buf, internal_offset, size, bti, I965_SURFACEFORMAT_RAW);
}
-void GenGpuState::setStack(uint32_t offset, uint32_t size, uint8_t bti)
+void GenGPUState::setStack(uint32_t offset, uint32_t size, uint8_t bti)
{
this->stack_b.bo = drm_intel_bo_alloc(bufmgr, "STACK", size, 64);
this->bindBuf(this->stack_b.bo, offset, 0, size, bti);
}
-bool GenGpuState::stateInit(uint32_t max_threads, uint32_t size_cs_entry, int profiling)
+bool GenGPUState::stateInit(uint32_t max_threads, uint32_t size_cs_entry, int profiling)
{
drm_intel_bo *bo = NULL;
@@ -166,7 +166,7 @@ bool GenGpuState::stateInit(uint32_t max_threads, uint32_t size_cs_entry, int pr
return true;
}
-bool GenGpuState::allocConstantBuffer(uint32_t size, uint8_t bti)
+bool GenGPUState::allocConstantBuffer(uint32_t size, uint8_t bti)
{
this->constant_b.bo = drm_intel_bo_alloc(this->bufmgr, "CONSTANT_BUFFER", size, 64);
if (this->constant_b.bo == NULL)
@@ -177,22 +177,76 @@ bool GenGpuState::allocConstantBuffer(uint32_t size, uint8_t bti)
}
/*****************************************************************************************
- *************************************** GEN7 ******************************************
+ ************************************** GEN7 *******************************************
*****************************************************************************************/
-void Gen7GpuState::selectPipeline(void)
+void Gen7GPUState::selectPipeline(void)
{
+ BEGIN_BATCH(this->batchbuf, 1);
+ OUT_BATCH(this->batchbuf, CMD_PIPELINE_SELECT | PIPELINE_SELECT_GPGPU);
+ ADVANCE_BATCH(this->batch);
}
-void Gen7GpuState::getCacheCtrl(void)
+uint32_t Gen7GPUState::getCacheCtrl(void)
{
+ return cc_llc_l3;
}
-void Gen7GpuState::setBaseAddress(void)
+void Gen7GPUState::setBaseAddress(void)
{
+ const uint32_t def_cc = this->getCacheCtrl(); /* default Cache Control value */
+ BEGIN_BATCH(this->batchbuf, 10);
+ OUT_BATCH(this->batchbuf, CMD_STATE_BASE_ADDRESS | 8);
+ /* 0, Gen State Mem Obj CC, Stateless Mem Obj CC, Stateless Access Write Back */
+ /* General State Base Addr */
+ OUT_BATCH(this->batchbuf, 0 | (def_cc << 8) | (def_cc << 4) | (0 << 3)| BASE_ADDRESS_MODIFY);
+ /* 0, State Mem Obj CC */
+ /* We use a state base address for the surface heap since IVB clamp the
+ * binding table pointer at 11 bits. So, we cannot use pointers directly while
+ * using the surface heap
+ */
+ GBE_ASSERT(this->aux_offset.surface_heap_offset % 4096 == 0);
+ OUT_RELOC(this->batchbuf, this->aux_buf.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ this->aux_offset.surface_heap_offset +
+ (0 | (def_cc << 8) | (def_cc << 4) | (0 << 3)| BASE_ADDRESS_MODIFY));
+
+ OUT_BATCH(this->batchbuf, 0 | (def_cc << 8) | BASE_ADDRESS_MODIFY); /* Dynamic State Base Addr */
+
+ OUT_BATCH(this->batchbuf, 0 | (def_cc << 8) | BASE_ADDRESS_MODIFY); /* Indirect Obj Base Addr */
+ OUT_BATCH(this->batchbuf, 0 | (def_cc << 8) | BASE_ADDRESS_MODIFY); /* Instruction Base Addr */
+ OUT_BATCH(this->batchbuf, 0 | BASE_ADDRESS_MODIFY);
+ /* According to mesa i965 driver code, we must set the dynamic state access upper bound
+ * to a valid bound value, otherwise, the border color pointer may be rejected and you
+ * may get incorrect border color. This is a known hardware bug. */
+ OUT_BATCH(this->batchbuf, 0xfffff000 | BASE_ADDRESS_MODIFY);
+ OUT_BATCH(this->batchbuf, 0 | BASE_ADDRESS_MODIFY);
+ OUT_BATCH(this->batchbuf, 0 | BASE_ADDRESS_MODIFY);
+ ADVANCE_BATCH(this->batchbuf);
}
-void Gen7GpuState::setupBTI(drm_intel_bo *buf, uint32_t internal_offset,
+void Gen7GPUState::setupBTI(drm_intel_bo *buf, uint32_t internal_offset,
size_t size, unsigned char index, uint32_t format)
{
+ GBE_ASSERT(size <= (2ul<<30));
+
+ size_t s = size - 1;
+ surface_heap *heap = (surface_heap *)((char*)this->aux_buf.bo->virt + this->aux_offset.surface_heap_offset);
+ gen7_surface_state_t *ss0 = (gen7_surface_state_t *) &heap->surface[index * sizeof(gen7_surface_state_t)];
+ memset(ss0, 0, sizeof(gen7_surface_state_t));
+ ss0->ss0.surface_type = I965_SURFACE_BUFFER;
+ ss0->ss0.surface_format = format;
+ ss0->ss2.width = s & 0x7f; /* bits 6:0 of sz */
+ // Per bspec, I965_SURFACE_BUFFER and RAW format, size must be a multiple of 4 byte.
+ if(format == I965_SURFACEFORMAT_RAW)
+ assert((ss0->ss2.width & 0x03) == 3);
+ ss0->ss2.height = (s >> 7) & 0x3fff; /* bits 20:7 of sz */
+ ss0->ss3.depth = (s >> 21) & 0x3ff; /* bits 30:21 of sz */
+ ss0->ss5.cache_control = this->getCacheCtrl();
+ heap->binding_table[index] = offsetof(surface_heap, surface) + index * sizeof(gen7_surface_state_t);
+
+ ss0->ss1.base_addr = buf->offset + internal_offset;
+ dri_bo_emit_reloc(this->aux_buf.bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ internal_offset, this->aux_offset.surface_heap_offset +
+ heap->binding_table[index] + offsetof(gen7_surface_state_t, ss1), buf);
}
diff --git a/backend/src/driver/cl_gen_gpu_state.h b/backend/src/driver/cl_gen_gpu_state.h
index 1aee529f..35ae5378 100644
--- a/backend/src/driver/cl_gen_gpu_state.h
+++ b/backend/src/driver/cl_gen_gpu_state.h
@@ -80,7 +80,23 @@ struct GenBatchbuffer {
}
};
-struct GenGpuState {
+#define BEGIN_BATCH(b, n) do { \
+ b->requireSpace((n) * 4); \
+} while (0)
+
+#define OUT_BATCH(b, d) do { \
+ b->emitDword(d); \
+} while (0)
+
+#define OUT_RELOC(b, bo, read_domains, write_domain, delta) do { \
+ GBE_ASSERT((delta) >= 0); \
+ b->emitReloc(bo, read_domains, write_domain, delta); \
+} while (0)
+
+#define ADVANCE_BATCH(b) do { } while (0)
+
+
+struct GenGPUState {
static const int max_buf_n = 128;
static const int max_img_n = 128;
static const int max_sampler_n = 16;
@@ -137,8 +153,8 @@ struct GenGpuState {
uint32_t max_threads; /* max threads requested by the user */
- GenGpuState(dri_bufmgr *bufmgr, drm_intel_context *ctx);
- ~GenGpuState(void);
+ GenGPUState(dri_bufmgr *bufmgr, drm_intel_context *ctx);
+ ~GenGPUState(void);
void newBatchbuf(size_t sz)
{
this->batchbuf = GBE_NEW(GenBatchbuffer, bufmgr, ctx, sz);
@@ -152,15 +168,15 @@ struct GenGpuState {
bool allocConstantBuffer(uint32_t size, uint8_t bti);
virtual void selectPipeline(void) = 0;
- virtual void getCacheCtrl(void) = 0;
+ virtual uint32_t getCacheCtrl(void) = 0;
virtual void setBaseAddress(void) = 0;
virtual void setupBTI(drm_intel_bo *buf, uint32_t internal_offset,
size_t size, unsigned char index, uint32_t format) = 0;
};
-struct Gen7GpuState : public GenGpuState {
+struct Gen7GPUState : public GenGPUState {
virtual void selectPipeline(void);
- virtual void getCacheCtrl(void);
+ virtual uint32_t getCacheCtrl(void);
virtual void setBaseAddress(void);
virtual void setupBTI(drm_intel_bo *buf, uint32_t internal_offset,
size_t size, unsigned char index, uint32_t format);