diff options
Diffstat (limited to 'backend/src')
-rw-r--r-- | backend/src/driver/cl_gen_device_id.cpp | 28 | ||||
-rw-r--r-- | backend/src/driver/cl_gen_gpu_defines.h | 40 | ||||
-rw-r--r-- | backend/src/driver/cl_gen_gpu_state.cpp | 80 | ||||
-rw-r--r-- | backend/src/driver/cl_gen_gpu_state.h | 28 |
4 files changed, 142 insertions, 34 deletions
diff --git a/backend/src/driver/cl_gen_device_id.cpp b/backend/src/driver/cl_gen_device_id.cpp index a36c87e4..6eec6e14 100644 --- a/backend/src/driver/cl_gen_device_id.cpp +++ b/backend/src/driver/cl_gen_device_id.cpp @@ -59,7 +59,7 @@ static cl_device_id gen_device; static _cl_extensions gen_device_extensions; static char gen_ext_string[CL_MAX_EXTENSION_LENGTH]; -struct GpgpuDevice { +struct GenGPUDevice { dri_bufmgr *bufmgr; int fd; bool from_x11; @@ -68,19 +68,19 @@ struct GpgpuDevice { cl_uint max_thread_per_unit; cl_uint sub_slice_count; cl_ulong scratch_mem_size; - GpgpuDevice(); - ~GpgpuDevice(); + GenGPUDevice(); + ~GenGPUDevice(); }; -static GpgpuDevice* getGPUDevice(cl_device_id device) +static GenGPUDevice* getGPUDevice(cl_device_id device) { - return reinterpret_cast<GpgpuDevice*>(getGenDevicePrivate(device)); + return reinterpret_cast<GenGPUDevice*>(getGenDevicePrivate(device)); } /* just used for maximum relocation number in drm_intel */ #define BATCH_SIZE 0x4000 -static int gpgpuDeviceInit(GpgpuDevice *gpu) +static int gpgpuDeviceInit(GenGPUDevice *gpu) { gpu->bufmgr = drm_intel_bufmgr_gem_init(gpu->fd, BATCH_SIZE); if (!gpu->bufmgr) @@ -116,7 +116,7 @@ static int gpgpuDeviceInit(GpgpuDevice *gpu) return 1; } -static int gpgpuDeviceOpenRender(GpgpuDevice *gpu) +static int gpgpuDeviceOpenRender(GenGPUDevice *gpu) { int cardi; int dev_fd; @@ -149,7 +149,7 @@ static int gpgpuDeviceOpenRender(GpgpuDevice *gpu) return 0; } -static int gpgpuDeviceOpenMaster(GpgpuDevice *gpu) +static int gpgpuDeviceOpenMaster(GenGPUDevice *gpu) { int cardi; int dev_fd, ret; @@ -195,7 +195,7 @@ static int gpgpuDeviceOpenMaster(GpgpuDevice *gpu) return 0; } -static int gpgpuDeviceOpenX11(GpgpuDevice *gpu) +static int gpgpuDeviceOpenX11(GenGPUDevice *gpu) { gpu->fd = dri2OpenX11(); if (gpu->fd >= 0) { @@ -214,7 +214,7 @@ static int gpgpuDeviceOpenX11(GpgpuDevice *gpu) return 0; } -static void gpgpuDeviceClose(GpgpuDevice *gpu) +static void gpgpuDeviceClose(GenGPUDevice *gpu) { if (gpu->bufmgr) drm_intel_bufmgr_destroy(gpu->bufmgr); @@ -231,7 +231,7 @@ static void gpgpuDeviceClose(GpgpuDevice *gpu) } } -GpgpuDevice::GpgpuDevice(void) : bufmgr(NULL), fd(-1), from_x11(0), device_id(0), +GenGPUDevice::GenGPUDevice(void) : bufmgr(NULL), fd(-1), from_x11(0), device_id(0), gen_ver(0), max_thread_per_unit(0), sub_slice_count(0), scratch_mem_size(0) { if (!gpgpuDeviceOpenX11(this) && !gpgpuDeviceOpenRender(this) @@ -246,12 +246,12 @@ GpgpuDevice::GpgpuDevice(void) : bufmgr(NULL), fd(-1), from_x11(0), device_id(0) } } -GpgpuDevice::~GpgpuDevice(void) +GenGPUDevice::~GenGPUDevice(void) { gpgpuDeviceClose(this); } -static void initGenDevice(GpgpuDevice* gpu) +static void initGenDevice(GenGPUDevice* gpu) { int device_id = gpu->device_id; @@ -723,7 +723,7 @@ cl_int GenDriverInit(cl_platform_id platform) return CL_SUCCESS; } - GpgpuDevice* gpuDev = GBE_NEW(GpgpuDevice); + GenGPUDevice* gpuDev = GBE_NEW(GenGPUDevice); if (gpuDev->gen_ver < 7) { gen_device = NULL; GBE_FREE(gpuDev); diff --git a/backend/src/driver/cl_gen_gpu_defines.h b/backend/src/driver/cl_gen_gpu_defines.h index 0d5b562b..5289e7d2 100644 --- a/backend/src/driver/cl_gen_gpu_defines.h +++ b/backend/src/driver/cl_gen_gpu_defines.h @@ -352,5 +352,43 @@ #define GEN_MAX_VME_STATES 8 #define GEN_MAX_IF_DESC 32 -#endif /* __CL_GEN_GPU_DEFINESI_H__ */ +/* Cache control options for gen7 */ +typedef enum cl_cache_control { + cc_gtt = 0x0, + cc_l3 = 0x1, + cc_llc = 0x2, + cc_llc_l3 = 0x3 +} cl_cache_control; + +/* L3 Cache control options for gen75 */ +typedef enum cl_l3_cache_control { + l3cc_uc = 0x0, + l3cc_ec = 0x1 +} cl_l3_cache_control; + +/* LLCCC Cache control options for gen75 */ +typedef enum cl_llccc_cache_control { + llccc_pte = 0x0<<1, + llccc_uc = 0x1<<1, + llccc_ec = 0x2<<1, + llccc_ucllc = 0x3<<1 +} cl_llccc_cache_control; + +/* Target Cache control options for gen8 */ +typedef enum cl_target_cache_control { + tcc_ec_only = 0x0<<3, + tcc_llc_only = 0x1<<3, + tcc_llc_ec = 0x2<<3, + tcc_llc_ec_l3 = 0x3<<3 +} cl_target_cache_control; + +/* Memory type LLC/ELLC Cache control options for gen8 */ +typedef enum cl_mtllc_cache_control { + mtllc_pte = 0x0<<5, + mtllc_none = 0x1<<5, + mtllc_wt = 0x2<<5, + mtllc_wb = 0x3<<5 +} cl_mtllc_cache_control; + +#endif /* __CL_GEN_GPU_DEFINESI_H__ */ diff --git a/backend/src/driver/cl_gen_gpu_state.cpp b/backend/src/driver/cl_gen_gpu_state.cpp index 692874e4..07c5bbe6 100644 --- a/backend/src/driver/cl_gen_gpu_state.cpp +++ b/backend/src/driver/cl_gen_gpu_state.cpp @@ -25,14 +25,14 @@ struct surface_heap { char surface[256*sizeof(gen_surface_state_t)]; }; -GenGpuState::GenGpuState(dri_bufmgr *bufmgr, drm_intel_context *ctx) +GenGPUState::GenGPUState(dri_bufmgr *bufmgr, drm_intel_context *ctx) { - memset(this, 0, sizeof(GenGpuState)); + memset(this, 0, sizeof(GenGPUState)); this->bufmgr = bufmgr; this->ctx = ctx; } -GenGpuState::~GenGpuState(void) +GenGPUState::~GenGPUState(void) { if(this->time_stamp_b.bo) { drm_intel_bo_unreference(this->time_stamp_b.bo); @@ -69,13 +69,13 @@ GenGpuState::~GenGpuState(void) } } -void GenGpuState::sync(void) +void GenGPUState::sync(void) { if (batchbuf) drm_intel_bo_wait_rendering((drm_intel_bo *)batchbuf->buffer); } -void GenGpuState::bindBuf(drm_intel_bo *buf, uint32_t offset, +void GenGPUState::bindBuf(drm_intel_bo *buf, uint32_t offset, uint32_t internal_offset, size_t size, uint8_t bti) { GBE_ASSERT(this->binded_n < max_buf_n); @@ -86,13 +86,13 @@ void GenGpuState::bindBuf(drm_intel_bo *buf, uint32_t offset, this->setupBTI(buf, internal_offset, size, bti, I965_SURFACEFORMAT_RAW); } -void GenGpuState::setStack(uint32_t offset, uint32_t size, uint8_t bti) +void GenGPUState::setStack(uint32_t offset, uint32_t size, uint8_t bti) { this->stack_b.bo = drm_intel_bo_alloc(bufmgr, "STACK", size, 64); this->bindBuf(this->stack_b.bo, offset, 0, size, bti); } -bool GenGpuState::stateInit(uint32_t max_threads, uint32_t size_cs_entry, int profiling) +bool GenGPUState::stateInit(uint32_t max_threads, uint32_t size_cs_entry, int profiling) { drm_intel_bo *bo = NULL; @@ -166,7 +166,7 @@ bool GenGpuState::stateInit(uint32_t max_threads, uint32_t size_cs_entry, int pr return true; } -bool GenGpuState::allocConstantBuffer(uint32_t size, uint8_t bti) +bool GenGPUState::allocConstantBuffer(uint32_t size, uint8_t bti) { this->constant_b.bo = drm_intel_bo_alloc(this->bufmgr, "CONSTANT_BUFFER", size, 64); if (this->constant_b.bo == NULL) @@ -177,22 +177,76 @@ bool GenGpuState::allocConstantBuffer(uint32_t size, uint8_t bti) } /***************************************************************************************** - *************************************** GEN7 ****************************************** + ************************************** GEN7 ******************************************* *****************************************************************************************/ -void Gen7GpuState::selectPipeline(void) +void Gen7GPUState::selectPipeline(void) { + BEGIN_BATCH(this->batchbuf, 1); + OUT_BATCH(this->batchbuf, CMD_PIPELINE_SELECT | PIPELINE_SELECT_GPGPU); + ADVANCE_BATCH(this->batch); } -void Gen7GpuState::getCacheCtrl(void) +uint32_t Gen7GPUState::getCacheCtrl(void) { + return cc_llc_l3; } -void Gen7GpuState::setBaseAddress(void) +void Gen7GPUState::setBaseAddress(void) { + const uint32_t def_cc = this->getCacheCtrl(); /* default Cache Control value */ + BEGIN_BATCH(this->batchbuf, 10); + OUT_BATCH(this->batchbuf, CMD_STATE_BASE_ADDRESS | 8); + /* 0, Gen State Mem Obj CC, Stateless Mem Obj CC, Stateless Access Write Back */ + /* General State Base Addr */ + OUT_BATCH(this->batchbuf, 0 | (def_cc << 8) | (def_cc << 4) | (0 << 3)| BASE_ADDRESS_MODIFY); + /* 0, State Mem Obj CC */ + /* We use a state base address for the surface heap since IVB clamp the + * binding table pointer at 11 bits. So, we cannot use pointers directly while + * using the surface heap + */ + GBE_ASSERT(this->aux_offset.surface_heap_offset % 4096 == 0); + OUT_RELOC(this->batchbuf, this->aux_buf.bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + this->aux_offset.surface_heap_offset + + (0 | (def_cc << 8) | (def_cc << 4) | (0 << 3)| BASE_ADDRESS_MODIFY)); + + OUT_BATCH(this->batchbuf, 0 | (def_cc << 8) | BASE_ADDRESS_MODIFY); /* Dynamic State Base Addr */ + + OUT_BATCH(this->batchbuf, 0 | (def_cc << 8) | BASE_ADDRESS_MODIFY); /* Indirect Obj Base Addr */ + OUT_BATCH(this->batchbuf, 0 | (def_cc << 8) | BASE_ADDRESS_MODIFY); /* Instruction Base Addr */ + OUT_BATCH(this->batchbuf, 0 | BASE_ADDRESS_MODIFY); + /* According to mesa i965 driver code, we must set the dynamic state access upper bound + * to a valid bound value, otherwise, the border color pointer may be rejected and you + * may get incorrect border color. This is a known hardware bug. */ + OUT_BATCH(this->batchbuf, 0xfffff000 | BASE_ADDRESS_MODIFY); + OUT_BATCH(this->batchbuf, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(this->batchbuf, 0 | BASE_ADDRESS_MODIFY); + ADVANCE_BATCH(this->batchbuf); } -void Gen7GpuState::setupBTI(drm_intel_bo *buf, uint32_t internal_offset, +void Gen7GPUState::setupBTI(drm_intel_bo *buf, uint32_t internal_offset, size_t size, unsigned char index, uint32_t format) { + GBE_ASSERT(size <= (2ul<<30)); + + size_t s = size - 1; + surface_heap *heap = (surface_heap *)((char*)this->aux_buf.bo->virt + this->aux_offset.surface_heap_offset); + gen7_surface_state_t *ss0 = (gen7_surface_state_t *) &heap->surface[index * sizeof(gen7_surface_state_t)]; + memset(ss0, 0, sizeof(gen7_surface_state_t)); + ss0->ss0.surface_type = I965_SURFACE_BUFFER; + ss0->ss0.surface_format = format; + ss0->ss2.width = s & 0x7f; /* bits 6:0 of sz */ + // Per bspec, I965_SURFACE_BUFFER and RAW format, size must be a multiple of 4 byte. + if(format == I965_SURFACEFORMAT_RAW) + assert((ss0->ss2.width & 0x03) == 3); + ss0->ss2.height = (s >> 7) & 0x3fff; /* bits 20:7 of sz */ + ss0->ss3.depth = (s >> 21) & 0x3ff; /* bits 30:21 of sz */ + ss0->ss5.cache_control = this->getCacheCtrl(); + heap->binding_table[index] = offsetof(surface_heap, surface) + index * sizeof(gen7_surface_state_t); + + ss0->ss1.base_addr = buf->offset + internal_offset; + dri_bo_emit_reloc(this->aux_buf.bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + internal_offset, this->aux_offset.surface_heap_offset + + heap->binding_table[index] + offsetof(gen7_surface_state_t, ss1), buf); } diff --git a/backend/src/driver/cl_gen_gpu_state.h b/backend/src/driver/cl_gen_gpu_state.h index 1aee529f..35ae5378 100644 --- a/backend/src/driver/cl_gen_gpu_state.h +++ b/backend/src/driver/cl_gen_gpu_state.h @@ -80,7 +80,23 @@ struct GenBatchbuffer { } }; -struct GenGpuState { +#define BEGIN_BATCH(b, n) do { \ + b->requireSpace((n) * 4); \ +} while (0) + +#define OUT_BATCH(b, d) do { \ + b->emitDword(d); \ +} while (0) + +#define OUT_RELOC(b, bo, read_domains, write_domain, delta) do { \ + GBE_ASSERT((delta) >= 0); \ + b->emitReloc(bo, read_domains, write_domain, delta); \ +} while (0) + +#define ADVANCE_BATCH(b) do { } while (0) + + +struct GenGPUState { static const int max_buf_n = 128; static const int max_img_n = 128; static const int max_sampler_n = 16; @@ -137,8 +153,8 @@ struct GenGpuState { uint32_t max_threads; /* max threads requested by the user */ - GenGpuState(dri_bufmgr *bufmgr, drm_intel_context *ctx); - ~GenGpuState(void); + GenGPUState(dri_bufmgr *bufmgr, drm_intel_context *ctx); + ~GenGPUState(void); void newBatchbuf(size_t sz) { this->batchbuf = GBE_NEW(GenBatchbuffer, bufmgr, ctx, sz); @@ -152,15 +168,15 @@ struct GenGpuState { bool allocConstantBuffer(uint32_t size, uint8_t bti); virtual void selectPipeline(void) = 0; - virtual void getCacheCtrl(void) = 0; + virtual uint32_t getCacheCtrl(void) = 0; virtual void setBaseAddress(void) = 0; virtual void setupBTI(drm_intel_bo *buf, uint32_t internal_offset, size_t size, unsigned char index, uint32_t format) = 0; }; -struct Gen7GpuState : public GenGpuState { +struct Gen7GPUState : public GenGPUState { virtual void selectPipeline(void); - virtual void getCacheCtrl(void); + virtual uint32_t getCacheCtrl(void); virtual void setBaseAddress(void); virtual void setupBTI(drm_intel_bo *buf, uint32_t internal_offset, size_t size, unsigned char index, uint32_t format); |