diff options
author | Ruiling Song <ruiling.song@intel.com> | 2014-07-30 13:59:29 +0800 |
---|---|---|
committer | Zhigang Gong <zhigang.gong@intel.com> | 2014-07-30 15:23:43 +0800 |
commit | e16f34c027b3e7a19355c91d728632d82995b2b0 (patch) | |
tree | 188e10aea38d79b2c8fee9e2c91b569bcf383880 /src/intel | |
parent | 778f56447172c737eceb8aaccca3d2801f242152 (diff) |
GBE: Refine bti usage in backend & runtime.
Previously, we simply map 2G surface for memory access,
which has obvious security issue, user can easily read/write graphics
memory that does not belong to him. To prevent such kind of behaviour,
We bind each surface to a dedicated bti. HW provides automatic
bounds check. For out-of-bound write, it will be ignored. And for read
out-of-bound, hardware will simply return zero value.
The idea behind the patch is for a load/store instruction, it will search
through the LLVM use-def chain until finding out where the address
comes from. Then the bti is saved in ir::Instruction and used for
the later code generation. And for mixed pointer case, a load/store
will access more than one bti.
To simplify some code, '0' is reserved for constant address space,
'1' is reserved for private address space. Other btis are assigned
automatically by backend.
Signed-off-by: Ruiling Song <ruiling.song@intel.com>
Reviewed-by: Zhigang Gong <zhigang.gong@linux.intel.com>
Diffstat (limited to 'src/intel')
-rw-r--r-- | src/intel/intel_gpgpu.c | 71 |
1 files changed, 35 insertions, 36 deletions
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c index 7986ab11..e0c00eaf 100644 --- a/src/intel/intel_gpgpu.c +++ b/src/intel/intel_gpgpu.c @@ -91,7 +91,6 @@ struct intel_gpgpu unsigned long img_bitmap; /* image usage bitmap. */ unsigned int img_index_base; /* base index for image surface.*/ - drm_intel_bo *binded_img[max_img_n + 128]; /* all images binded for the call */ unsigned long sampler_bitmap; /* sampler usage bitmap. */ @@ -690,13 +689,13 @@ intel_gpgpu_set_buf_reloc_gen7(intel_gpgpu_t *gpgpu, int32_t index, dri_bo* obj_ } static dri_bo* -intel_gpgpu_alloc_constant_buffer_gen7(intel_gpgpu_t *gpgpu, uint32_t size) +intel_gpgpu_alloc_constant_buffer_gen7(intel_gpgpu_t *gpgpu, uint32_t size, uint8_t bti) { uint32_t s = size - 1; assert(size != 0); surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset; - gen7_surface_state_t *ss2 = (gen7_surface_state_t *) heap->surface[2]; + gen7_surface_state_t *ss2 = (gen7_surface_state_t *) heap->surface[bti]; memset(ss2, 0, sizeof(gen7_surface_state_t)); ss2->ss0.surface_type = I965_SURFACE_BUFFER; ss2->ss0.surface_format = I965_SURFACEFORMAT_R32G32B32A32_UINT; @@ -704,7 +703,7 @@ intel_gpgpu_alloc_constant_buffer_gen7(intel_gpgpu_t *gpgpu, uint32_t size) ss2->ss2.height = (s >> 7) & 0x3fff; /* bits 20:7 of sz */ ss2->ss3.depth = (s >> 21) & 0x3ff; /* bits 30:21 of sz */ ss2->ss5.cache_control = cl_gpgpu_get_cache_ctrl(); - heap->binding_table[2] = offsetof(surface_heap_t, surface) + 2* sizeof(gen7_surface_state_t); + heap->binding_table[bti] = offsetof(surface_heap_t, surface) + bti* sizeof(gen7_surface_state_t); if(gpgpu->constant_b.bo) dri_bo_unreference(gpgpu->constant_b.bo); @@ -717,20 +716,20 @@ intel_gpgpu_alloc_constant_buffer_gen7(intel_gpgpu_t *gpgpu, uint32_t size) I915_GEM_DOMAIN_RENDER, 0, gpgpu->aux_offset.surface_heap_offset + - heap->binding_table[2] + + heap->binding_table[bti] + offsetof(gen7_surface_state_t, ss1), gpgpu->constant_b.bo); return gpgpu->constant_b.bo; } static dri_bo* -intel_gpgpu_alloc_constant_buffer_gen75(intel_gpgpu_t *gpgpu, uint32_t size) +intel_gpgpu_alloc_constant_buffer_gen75(intel_gpgpu_t *gpgpu, uint32_t size, uint8_t bti) { uint32_t s = size - 1; assert(size != 0); surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset; - gen7_surface_state_t *ss2 = (gen7_surface_state_t *) heap->surface[2]; + gen7_surface_state_t *ss2 = (gen7_surface_state_t *) heap->surface[bti]; memset(ss2, 0, sizeof(gen7_surface_state_t)); ss2->ss0.surface_type = I965_SURFACE_BUFFER; ss2->ss0.surface_format = I965_SURFACEFORMAT_R32G32B32A32_UINT; @@ -742,7 +741,7 @@ intel_gpgpu_alloc_constant_buffer_gen75(intel_gpgpu_t *gpgpu, uint32_t size) ss2->ss7.shader_g = I965_SURCHAN_SELECT_GREEN; ss2->ss7.shader_b = I965_SURCHAN_SELECT_BLUE; ss2->ss7.shader_a = I965_SURCHAN_SELECT_ALPHA; - heap->binding_table[2] = offsetof(surface_heap_t, surface) + 2* sizeof(gen7_surface_state_t); + heap->binding_table[bti] = offsetof(surface_heap_t, surface) + bti* sizeof(gen7_surface_state_t); if(gpgpu->constant_b.bo) dri_bo_unreference(gpgpu->constant_b.bo); @@ -755,36 +754,39 @@ intel_gpgpu_alloc_constant_buffer_gen75(intel_gpgpu_t *gpgpu, uint32_t size) I915_GEM_DOMAIN_RENDER, 0, gpgpu->aux_offset.surface_heap_offset + - heap->binding_table[2] + + heap->binding_table[bti] + offsetof(gen7_surface_state_t, ss1), gpgpu->constant_b.bo); return gpgpu->constant_b.bo; } - -/* Map address space with two 2GB surfaces. One surface for untyped message and - * one surface for byte scatters / gathers. Actually the HW does not require two - * surfaces but Fulsim complains - */ static void -intel_gpgpu_map_address_space(intel_gpgpu_t *gpgpu) +intel_gpgpu_setup_bti(intel_gpgpu_t *gpgpu, drm_intel_bo *buf, uint32_t internal_offset, uint32_t size, unsigned char index) { + uint32_t s = size - 1; surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset; - gen7_surface_state_t *ss0 = (gen7_surface_state_t *) heap->surface[0]; - gen7_surface_state_t *ss1 = (gen7_surface_state_t *) heap->surface[1]; + gen7_surface_state_t *ss0 = (gen7_surface_state_t *) heap->surface[index]; memset(ss0, 0, sizeof(gen7_surface_state_t)); - memset(ss1, 0, sizeof(gen7_surface_state_t)); - ss1->ss0.surface_type = ss0->ss0.surface_type = I965_SURFACE_BUFFER; - ss1->ss0.surface_format = ss0->ss0.surface_format = I965_SURFACEFORMAT_RAW; - ss1->ss2.width = ss0->ss2.width = 127; /* bits 6:0 of sz */ - ss1->ss2.height = ss0->ss2.height = 16383; /* bits 20:7 of sz */ - ss0->ss3.depth = 1023; /* bits 30:21 of sz */ - ss1->ss3.depth = 1023; /* bits 30:21 of sz */ - ss1->ss5.cache_control = ss0->ss5.cache_control = cl_gpgpu_get_cache_ctrl(); - heap->binding_table[0] = offsetof(surface_heap_t, surface); - heap->binding_table[1] = sizeof(gen7_surface_state_t) + offsetof(surface_heap_t, surface); + ss0->ss0.surface_type = I965_SURFACE_BUFFER; + ss0->ss0.surface_format = I965_SURFACEFORMAT_RAW; + ss0->ss2.width = s & 0x7f; /* bits 6:0 of sz */ + ss0->ss2.height = (s >> 7) & 0x3fff; /* bits 20:7 of sz */ + ss0->ss3.depth = (s >> 21) & 0x3ff; /* bits 30:21 of sz */ + ss0->ss5.cache_control = cl_gpgpu_get_cache_ctrl(); + heap->binding_table[index] = offsetof(surface_heap_t, surface) + index * sizeof(gen7_surface_state_t); + + ss0->ss1.base_addr = buf->offset + internal_offset; + dri_bo_emit_reloc(gpgpu->aux_buf.bo, + I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER, + internal_offset, + gpgpu->aux_offset.surface_heap_offset + + heap->binding_table[index] + + offsetof(gen7_surface_state_t, ss1), + buf); } + static int intel_is_surface_array(cl_mem_object_type type) { @@ -863,7 +865,6 @@ intel_gpgpu_bind_image_gen7(intel_gpgpu_t *gpgpu, } ss->ss0.render_cache_rw_mode = 1; /* XXX do we need to set it? */ intel_gpgpu_set_buf_reloc_gen7(gpgpu, index, obj_bo, obj_bo_offset); - gpgpu->binded_img[index - gpgpu->img_index_base] = obj_bo; assert(index < GEN_MAX_SURFACES); } @@ -884,7 +885,6 @@ intel_gpgpu_bind_image_gen75(intel_gpgpu_t *gpgpu, surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset; gen7_surface_state_t *ss = (gen7_surface_state_t *) heap->surface[index]; memset(ss, 0, sizeof(*ss)); - ss->ss0.vertical_line_stride = 0; // always choose VALIGN_2 if (index > 128 + 2 && type == CL_MEM_OBJECT_IMAGE1D_ARRAY) ss->ss0.surface_type = I965_SURFACE_2D; @@ -916,20 +916,20 @@ intel_gpgpu_bind_image_gen75(intel_gpgpu_t *gpgpu, } ss->ss0.render_cache_rw_mode = 1; /* XXX do we need to set it? */ intel_gpgpu_set_buf_reloc_gen7(gpgpu, index, obj_bo, obj_bo_offset); - gpgpu->binded_img[index - gpgpu->img_index_base] = obj_bo; assert(index < GEN_MAX_SURFACES); } static void intel_gpgpu_bind_buf(intel_gpgpu_t *gpgpu, drm_intel_bo *buf, uint32_t offset, - uint32_t internal_offset, uint32_t cchint) + uint32_t internal_offset, uint32_t size, uint8_t bti) { assert(gpgpu->binded_n < max_buf_n); gpgpu->binded_buf[gpgpu->binded_n] = buf; gpgpu->target_buf_offset[gpgpu->binded_n] = internal_offset; gpgpu->binded_offset[gpgpu->binded_n] = offset; gpgpu->binded_n++; + intel_gpgpu_setup_bti(gpgpu, buf, internal_offset, size, bti); } static int @@ -957,11 +957,12 @@ intel_gpgpu_set_scratch(intel_gpgpu_t * gpgpu, uint32_t per_thread_size) return 0; } static void -intel_gpgpu_set_stack(intel_gpgpu_t *gpgpu, uint32_t offset, uint32_t size, uint32_t cchint) +intel_gpgpu_set_stack(intel_gpgpu_t *gpgpu, uint32_t offset, uint32_t size, uint8_t bti) { drm_intel_bufmgr *bufmgr = gpgpu->drv->bufmgr; gpgpu->stack_b.bo = drm_intel_bo_alloc(bufmgr, "STACK", size, 64); - intel_gpgpu_bind_buf(gpgpu, gpgpu->stack_b.bo, offset, 0, cchint); + + intel_gpgpu_bind_buf(gpgpu, gpgpu->stack_b.bo, offset, 0, size, bti); } static void @@ -1155,7 +1156,6 @@ intel_gpgpu_states_setup(intel_gpgpu_t *gpgpu, cl_gpgpu_kernel *kernel) { gpgpu->ker = kernel; intel_gpgpu_build_idrt(gpgpu, kernel); - intel_gpgpu_map_address_space(gpgpu); dri_bo_unmap(gpgpu->aux_buf.bo); } @@ -1378,8 +1378,7 @@ intel_gpgpu_set_printf_buf(intel_gpgpu_t *gpgpu, uint32_t i, uint32_t size, uint } memset(bo->virtual, 0, size); drm_intel_bo_unmap(bo); - - intel_gpgpu_bind_buf(gpgpu, bo, offset, 0, 0); + intel_gpgpu_bind_buf(gpgpu, bo, offset, 0, size, 0); return 0; } |