diff options
author | Mark Brown <broonie@kernel.org> | 2017-04-25 16:35:35 +0100 |
---|---|---|
committer | Mark Brown <broonie@kernel.org> | 2017-04-25 16:35:35 +0100 |
commit | 9095bf25ea08135a5b74875dd0e3eeaddc4218a0 (patch) | |
tree | a4e6976eab0e7a5b6258281077cd5853c7e69514 /drivers/gpu/drm/i915/i915_gem_render_state.c | |
parent | cdf4275e957c6bad3756e98942341667f1d7de7d (diff) | |
parent | c1ae3cfa0e89fa1a7ecc4c99031f5e9ae99d9201 (diff) |
Merge tag 'v4.11-rc1' into regulator-arizona
Linux 4.11-rc1
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem_render_state.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem_render_state.c | 188 |
1 files changed, 119 insertions, 69 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index 95b7e9afd5f8..7032c542a9b1 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -28,17 +28,19 @@ #include "i915_drv.h" #include "intel_renderstate.h" -struct render_state { +struct intel_render_state { const struct intel_renderstate_rodata *rodata; struct i915_vma *vma; - u32 aux_batch_size; - u32 aux_batch_offset; + u32 batch_offset; + u32 batch_size; + u32 aux_offset; + u32 aux_size; }; static const struct intel_renderstate_rodata * -render_state_get_rodata(const struct drm_i915_gem_request *req) +render_state_get_rodata(const struct intel_engine_cs *engine) { - switch (INTEL_GEN(req->i915)) { + switch (INTEL_GEN(engine->i915)) { case 6: return &gen6_null_state; case 7: @@ -58,34 +60,31 @@ render_state_get_rodata(const struct drm_i915_gem_request *req) * this is sufficient as the null state generator makes the final batch * with two passes to build command and state separately. At this point * the size of both are known and it compacts them by relocating the state - * right after the commands taking care of aligment so we should sufficient + * right after the commands taking care of alignment so we should sufficient * space below them for adding new commands. */ #define OUT_BATCH(batch, i, val) \ do { \ - if (WARN_ON((i) >= PAGE_SIZE / sizeof(u32))) { \ - ret = -ENOSPC; \ - goto err_out; \ - } \ + if ((i) >= PAGE_SIZE / sizeof(u32)) \ + goto err; \ (batch)[(i)++] = (val); \ } while(0) -static int render_state_setup(struct render_state *so) +static int render_state_setup(struct intel_render_state *so, + struct drm_i915_private *i915) { - struct drm_device *dev = so->vma->vm->dev; const struct intel_renderstate_rodata *rodata = so->rodata; - const bool has_64bit_reloc = INTEL_GEN(dev) >= 8; + struct drm_i915_gem_object *obj = so->vma->obj; unsigned int i = 0, reloc_index = 0; - struct page *page; + unsigned int needs_clflush; u32 *d; int ret; - ret = i915_gem_object_set_to_cpu_domain(so->vma->obj, true); + ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush); if (ret) return ret; - page = i915_gem_object_get_dirty_page(so->vma->obj, 0); - d = kmap(page); + d = kmap_atomic(i915_gem_object_get_dirty_page(obj, 0)); while (i < rodata->batch_items) { u32 s = rodata->batch[i]; @@ -93,12 +92,10 @@ static int render_state_setup(struct render_state *so) if (i * 4 == rodata->reloc[reloc_index]) { u64 r = s + so->vma->node.start; s = lower_32_bits(r); - if (has_64bit_reloc) { + if (HAS_64BIT_RELOC(i915)) { if (i + 1 >= rodata->batch_items || - rodata->batch[i + 1] != 0) { - ret = -EINVAL; - goto err_out; - } + rodata->batch[i + 1] != 0) + goto err; d[i++] = s; s = upper_32_bits(r); @@ -110,12 +107,20 @@ static int render_state_setup(struct render_state *so) d[i++] = s; } + if (rodata->reloc[reloc_index] != -1) { + DRM_ERROR("only %d relocs resolved\n", reloc_index); + goto err; + } + + so->batch_offset = so->vma->node.start; + so->batch_size = rodata->batch_items * sizeof(u32); + while (i % CACHELINE_DWORDS) OUT_BATCH(d, i, MI_NOOP); - so->aux_batch_offset = i * sizeof(u32); + so->aux_offset = i * sizeof(u32); - if (HAS_POOLED_EU(dev)) { + if (HAS_POOLED_EU(i915)) { /* * We always program 3x6 pool config but depending upon which * subslice is disabled HW drops down to appropriate config @@ -143,88 +148,133 @@ static int render_state_setup(struct render_state *so) } OUT_BATCH(d, i, MI_BATCH_BUFFER_END); - so->aux_batch_size = (i * sizeof(u32)) - so->aux_batch_offset; - + so->aux_size = i * sizeof(u32) - so->aux_offset; + so->aux_offset += so->batch_offset; /* * Since we are sending length, we need to strictly conform to * all requirements. For Gen2 this must be a multiple of 8. */ - so->aux_batch_size = ALIGN(so->aux_batch_size, 8); - - kunmap(page); - - ret = i915_gem_object_set_to_gtt_domain(so->vma->obj, false); - if (ret) - return ret; - - if (rodata->reloc[reloc_index] != -1) { - DRM_ERROR("only %d relocs resolved\n", reloc_index); - return -EINVAL; - } + so->aux_size = ALIGN(so->aux_size, 8); - return 0; + if (needs_clflush) + drm_clflush_virt_range(d, i * sizeof(u32)); + kunmap_atomic(d); -err_out: - kunmap(page); + ret = i915_gem_object_set_to_gtt_domain(obj, false); +out: + i915_gem_obj_finish_shmem_access(obj); return ret; + +err: + kunmap_atomic(d); + ret = -EINVAL; + goto out; } #undef OUT_BATCH -int i915_gem_render_state_init(struct drm_i915_gem_request *req) +int i915_gem_render_state_init(struct intel_engine_cs *engine) { - struct render_state so; + struct intel_render_state *so; + const struct intel_renderstate_rodata *rodata; struct drm_i915_gem_object *obj; int ret; - if (WARN_ON(req->engine->id != RCS)) - return -ENOENT; + if (engine->id != RCS) + return 0; - so.rodata = render_state_get_rodata(req); - if (!so.rodata) + rodata = render_state_get_rodata(engine); + if (!rodata) return 0; - if (so.rodata->batch_items * 4 > 4096) + if (rodata->batch_items * 4 > PAGE_SIZE) return -EINVAL; - obj = i915_gem_object_create(&req->i915->drm, 4096); - if (IS_ERR(obj)) - return PTR_ERR(obj); + so = kmalloc(sizeof(*so), GFP_KERNEL); + if (!so) + return -ENOMEM; - so.vma = i915_vma_create(obj, &req->i915->ggtt.base, NULL); - if (IS_ERR(so.vma)) { - ret = PTR_ERR(so.vma); - goto err_obj; + obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE); + if (IS_ERR(obj)) { + ret = PTR_ERR(obj); + goto err_free; } - ret = i915_vma_pin(so.vma, 0, 0, PIN_GLOBAL); - if (ret) + so->vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL); + if (IS_ERR(so->vma)) { + ret = PTR_ERR(so->vma); goto err_obj; + } + + so->rodata = rodata; + engine->render_state = so; + return 0; + +err_obj: + i915_gem_object_put(obj); +err_free: + kfree(so); + return ret; +} + +int i915_gem_render_state_emit(struct drm_i915_gem_request *req) +{ + struct intel_render_state *so; + int ret; + + lockdep_assert_held(&req->i915->drm.struct_mutex); - ret = render_state_setup(&so); + so = req->engine->render_state; + if (!so) + return 0; + + /* Recreate the page after shrinking */ + if (!so->vma->obj->mm.pages) + so->batch_offset = -1; + + ret = i915_vma_pin(so->vma, 0, 0, PIN_GLOBAL | PIN_HIGH); if (ret) - goto err_unpin; + return ret; + + if (so->vma->node.start != so->batch_offset) { + ret = render_state_setup(so, req->i915); + if (ret) + goto err_unpin; + } - ret = req->engine->emit_bb_start(req, so.vma->node.start, - so.rodata->batch_items * 4, + ret = req->engine->emit_bb_start(req, + so->batch_offset, so->batch_size, I915_DISPATCH_SECURE); if (ret) goto err_unpin; - if (so.aux_batch_size > 8) { + if (so->aux_size > 8) { ret = req->engine->emit_bb_start(req, - (so.vma->node.start + - so.aux_batch_offset), - so.aux_batch_size, + so->aux_offset, so->aux_size, I915_DISPATCH_SECURE); if (ret) goto err_unpin; } - i915_vma_move_to_active(so.vma, req, 0); + i915_vma_move_to_active(so->vma, req, 0); err_unpin: - i915_vma_unpin(so.vma); -err_obj: - i915_gem_object_put(obj); + i915_vma_unpin(so->vma); return ret; } + +void i915_gem_render_state_fini(struct intel_engine_cs *engine) +{ + struct intel_render_state *so; + struct drm_i915_gem_object *obj; + + so = fetch_and_zero(&engine->render_state); + if (!so) + return; + + obj = so->vma->obj; + + i915_vma_close(so->vma); + __i915_gem_object_release_unless_active(obj); + + kfree(so); +} |